In [1]:
import pickle
import pandas as pd 
import numpy as np

map_filename = './data_parl_map.pickle'
district_era_filename = './district_eras_map.pickle'


In [2]:
class RidingObject:
    def __init__(self, name):
        self.name = name
        self.eras = []
        self.elections = []

class Era:
    def __init__(self, start, end, predecessors, successors):
        self.start = start
        self.end = end
        self.predecessors = predecessors
        self.successors = successors

    def add_dates(self, start, end):
        self.start = start
        self.end = end

    def add_predecessors (self, predecessors):
        self.predecessors = predecessors

    def add_successors(self, successors):
        self.successors = successors

In [3]:
with open(map_filename, 'rb') as handle:
    pickle_map = pickle.load(handle)
print(list(pickle_map.items()))

[('Bonavista—Twillingate', 'Bonavista--Twillingate'), ('Burin—Burgeo', 'Burin--Burgeo'), ('Grand Falls—White Bay', 'Grand Falls--White Bay'), ("Humber—St. George's", "Humber--St. George's"), ("St. John's East", "St. John's East"), ("St. John's West", "St. John's West"), ('Trinity—Conception', 'Trinity--Conception'), ('Annapolis—Kings', 'Annapolis--Kings'), ('Antigonish—Guysborough', 'Antigonish--Guysborough'), ('Cape Breton North and Victoria', 'Cape Breton North and Victoria'), ('Cape Breton South (federal electoral district)', 'Cape Breton South'), ('Colchester—Hants', 'Colchester--Hants'), ('Cumberland (electoral district)', 'Cumberland'), ('Digby—Yarmouth', 'Digby--Yarmouth'), ('Halifax (electoral district)', 'Halifax'), ('Inverness—Richmond', 'Inverness--Richmond'), ('Lunenburg (electoral district)', 'Lunenburg'), ('Pictou (electoral district)', 'Pictou'), ('Queens—Shelburne', 'Queens--Shelburne'), ("King's (Prince Edward Island electoral district)", "King's"), ('Prince (electoral

In [4]:
with open(district_era_filename, 'rb') as handle:
    scraped_data_map = pickle.load(handle)
print(list(scraped_data_map.items()))

[('Vancouver Centre', <__main__.RidingObject object at 0x7f02901c7520>)]


In [5]:
filename = "./federal_election_data.csv"

elec_data = pd.read_csv(filename, sep=',', delimiter=None, header='infer')
elec_data = elec_data[:5000]
elec_data['score'] = np.zeros(len(elec_data))
print(elec_data.columns)
#print(pickle_map['vancouver centre'])
 
relevant_data = elec_data.loc[elec_data['riding'] == 'vancouver centre']
print(relevant_data)


Index(['riding', 'electiondate', 'provincecode', 'lastname', 'firstname',
       'parliament', 'byelec', 'other', 'lib', 'ndp', 'green', 'bloc',
       'allcons', 'won', 'score'],
      dtype='object')
                riding electiondate  provincecode  lastname         firstname  \
3814  vancouver centre   1921-12-06             2    BATSON  CADWALLADERFLAGG   
3815  vancouver centre   1921-12-06             2   STEVENS      HENRYHERBERT   
3816  vancouver centre   1921-12-06             2      GALE       ROBERTHENRY   
3817  vancouver centre   1921-12-06             2  O'CONNOR            THOMAS   
3850  vancouver centre   1925-10-29             2   LEFEAUX      WALLISWALTER   
...                ...          ...           ...       ...               ...   
4954  vancouver centre   1972-10-30             2    AUGUST            ARNOLD   
4955  vancouver centre   1972-10-30             2  MCDONALD              JOHN   
4956  vancouver centre   1972-10-30             2     DODGE          

In [6]:
election_dates = relevant_data['electiondate']
print(len(election_dates))
print(set(election_dates))


77
{'1926-09-14', '1935-10-14', '1958-03-31', '1962-06-18', '1949-06-27', '1965-11-08', '1945-06-11', '1940-03-26', '1930-07-28', '1963-04-08', '1948-06-08', '1972-10-30', '1921-12-06', '1968-06-25', '1957-06-10', '1925-10-29', '1953-08-10'}


In [7]:
import datetime
 
election_dates = list(set(election_dates))
 
election_dates = [datetime.datetime.strptime(date, "%Y-%m-%d") for date in election_dates]
election_dates.sort()
sorted_election_dates = [datetime.datetime.strftime(date, "%Y-%m-%d") for date in election_dates]

print(sorted_election_dates)


['1921-12-06', '1925-10-29', '1926-09-14', '1930-07-28', '1935-10-14', '1940-03-26', '1945-06-11', '1948-06-08', '1949-06-27', '1953-08-10', '1957-06-10', '1958-03-31', '1962-06-18', '1963-04-08', '1965-11-08', '1968-06-25', '1972-10-30']


In [8]:
import numpy as np
 
election_competition = np.zeros_like(election_dates)
winner_scores = np.zeros_like(election_dates)

def compute_score(index, party):
    score = 0
    if (index > 0 and election_competition[index - 1] == party):
        score += 3
    if (index -1 > 0 and election_competition[index - 2] == party):
        score += 2
    if (index -2 > 0 and election_competition[index - 3] == party):
        score += 1
    return score
compute_score_vec = np.vectorize(compute_score)
 
for idx, election_date in enumerate(sorted_election_dates):
    election_data = relevant_data.loc[relevant_data['electiondate'] == election_date]
    #print('############################################################')
    #print(election_data)
    candidates_data = np.array(election_data.iloc[:, 7:-2])
    candidate_parties = np.argmax(candidates_data, axis=1)
    results_data = np.array(election_data.iloc[:, -1:])

    winner = np.argmax(results_data)
    winner_party = candidate_parties[winner]
    election_competition[idx] = winner_party

    candidate_scores = compute_score_vec(idx, candidate_parties)
    relevant_data.loc[relevant_data['electiondate'] == election_date, 'score'] = candidate_scores
    print(relevant_data[relevant_data['electiondate'] == election_date])


In [9]:
"""
0 - other 
1 - lib
2 - ndp
3 - green
4 - bloc
5 - cons
"""


def get_score(index):
    if (index < 0):
        return -1
    else:
        return election_competition[idx]
 
print(election_competition)

# For each index
# Get score for previous elections

[0 0 1 5 0 1 0 0 0 0 0 5 5 2 0 5 1]


In [83]:
print(relevant_data)
print(elec_data.loc[elec_data['riding'] == 'vancouver centre'])

riding electiondate  provincecode  lastname         firstname  \
3814  vancouver centre   1921-12-06             2    BATSON  CADWALLADERFLAGG   
3815  vancouver centre   1921-12-06             2   STEVENS      HENRYHERBERT   
3816  vancouver centre   1921-12-06             2      GALE       ROBERTHENRY   
3817  vancouver centre   1921-12-06             2  O'CONNOR            THOMAS   
3850  vancouver centre   1925-10-29             2   LEFEAUX      WALLISWALTER   
...                ...          ...           ...       ...               ...   
4954  vancouver centre   1972-10-30             2    AUGUST            ARNOLD   
4955  vancouver centre   1972-10-30             2  MCDONALD              JOHN   
4956  vancouver centre   1972-10-30             2     DODGE               RAY   
4957  vancouver centre   1972-10-30             2   JOHNSON              RONK   
4958  vancouver centre   1972-10-30             2   FEDORUK        DANIELIVAN   

      parliament  byelec  other  lib  ndp  

In [11]:
# COMPETITIVENESS COMPUTATION
"""
-Need data on the previous 3 elections

At each election points are awarded for having won preceding elections in that riding

Election | points
-------------------
N - 1 |   3
-------------------
N - 2 |   2
-------------------
N - 3 |   1

"""

'\n-Need data on the previous 3 elections\n\nAt each election points are awarded for having won preceding elections in that riding\n\nElection | points\n-------------------\nN - 1 |   3\n-------------------\nN - 2 |   2\n-------------------\nN - 3 |   1\n\n'

In [12]:
# must be done chronologically
# for each election date in order

In [13]:
# Get example of recent riding

In [14]:
# For election in riding
    # Obtain relevant preceding election data
    # For candidate in election
    # 

In [31]:
# Run for all dates in current csv

# HAVE TO DEAL WITH BYELECTIONS
def get_initialization(riding_name):
    relevant_data = elec_data.loc[elec_data['riding'] == riding_name]
    election_dates = relevant_data['electiondate']
    election_dates = list(set(election_dates))
 
    election_dates = [datetime.datetime.strptime(date, "%Y-%m-%d") for date in election_dates]
    election_dates.sort()
    sorted_election_dates = [datetime.datetime.strftime(date, "%Y-%m-%d") for date in election_dates]

    return sorted_election_dates, np.zeros_like(election_dates)

# Create dict to hold past election data for each riding
riding_names = list(set(elec_data['riding']))
score_dict = {}

bad = 0
valid_riding_names = []
for riding_name in riding_names:
    try:
        score_dict[riding_name] = (get_initialization(riding_name))
        valid_riding_names.append(riding_name)
    except:
        bad += 1
print(bad)




104


In [114]:
def compute_score(index, party, history):
    score = 0
    if (index > 0 and history[index - 1] == party):
        score += 3
    if (index -1 > 0 and history[index - 2] == party):
        score += 2
    if (index -2 > 0 and history[index - 3] == party):
        score += 1
    return score

compute_score_vec = np.vectorize(compute_score)

def score_riding(riding_name):
    #election_dates = score_dict[riding_name][0]
    relevant_data = elec_data.loc[elec_data['riding'] == riding_name]
    #print(relevant_data)

    sorted_election_dates, election_competition = score_dict[riding_name]
    print(election_competition)

    for idx, election_date in enumerate(sorted_election_dates):
        election_data = relevant_data.loc[relevant_data['electiondate'] == election_date]
        print('############################################################')
        #print(idx)
        print(election_data)
        candidates_data = np.array(election_data.iloc[:, 7:-2])
        candidate_parties = np.argmax(candidates_data, axis=1)
        results_data = np.array(election_data.iloc[:, -1:])

        winner = np.argmax(results_data)
        winner_party = candidate_parties[winner]
        election_competition[idx] = winner_party
        #print(election_competition)

        candidate_scores = np.array([compute_score(idx, candidate_party, election_competition) for candidate_party in candidate_parties])
        print(candidate_scores)
        elec_data.loc[np.logical_and(elec_data['riding'] == riding_name, elec_data['electiondate'] == election_date), 'score'] = candidate_scores
        print(relevant_data.loc[np.logical_and(elec_data['riding'] == riding_name, relevant_data['electiondate'] == election_date), 'score'])
        # update data
    score_dict[riding_name] = (sorted_election_dates, election_competition)
    #input()
    


for riding_name in valid_riding_names:
    #try:
    score_riding(riding_name)
    #except:
    #   bad += 1
print(bad)

[1]
############################################################
                   riding electiondate  provincecode lastname firstname  \
3267  calgary signal hill   2015-10-19             1   CUNDAL     KERRY   
3268  calgary signal hill   2015-10-19             1      RAU     JESSE   
3269  calgary signal hill   2015-10-19             1  LIEPERT       RON   
3270  calgary signal hill   2015-10-19             1    AHMED    KHALIS   
3271  calgary signal hill   2015-10-19             1     MOEN       TIM   
3272  calgary signal hill   2015-10-19             1  KNORREN     TARYN   

      parliament  byelec  other  lib  ndp  green  bloc  allcons  won  score  
3267          42       0      0    1    0      0     0        0    0    0.0  
3268          42       0      1    0    0      0     0        0    0    0.0  
3269          42       0      0    0    0      0     0        1    1    0.0  
3270          42       0      0    0    1      0     0        0    0    0.0  
3271          42   

In [115]:
print(elec_data)

riding electiondate  provincecode  \
0                Yukon  -1642291200             0   
1                Yukon  -1642291200             0   
2                Yukon  -1839196800             0   
3                Yukon  -1839196800             0   
4                Yukon  -1930780800             0   
...                ...          ...           ...   
4995  burnaby--seymour   1974-07-08             2   
4996  burnaby--seymour   1974-07-08             2   
4997  burnaby--seymour   1974-07-08             2   
4998  burnaby--seymour   1974-07-08             2   
4999          capilano   1974-07-08             2   

                         lastname firstname  parliament  byelec  other  lib  \
0                Thompson, Alfred       NaN          13       0      1    0   
1     Congdon, Frederick Tennyson       NaN          13       0      1    0   
2                Thompson, Alfred       NaN          12       0      0    0   
3     Congdon, Frederick Tennyson       NaN          12       0

In [116]:
print(elec_data[elec_data['score'] != 0])

riding electiondate  provincecode    lastname  \
28                       yukon   1926-09-14             0       BLACK   
29                       yukon   1930-07-28             0       BLACK   
30                       yukon   1930-07-28             0    THOMPSON   
31                       yukon   1935-10-14             0       SMITH   
33                       yukon   1940-03-26             0        REID   
...                        ...          ...           ...         ...   
4988  burnaby--richmond--delta   1974-07-08             2     WALLACE   
4993          burnaby--seymour   1974-07-08             2      DOUCET   
4994          burnaby--seymour   1974-07-08             2       WAUGH   
4997          burnaby--seymour   1974-07-08             2       MALEY   
4999                  capilano   1974-07-08             2  RICHARDSON   

          firstname  parliament  byelec  other  lib  ndp  green  bloc  \
28           GEORGE          16       0      0    0    0      0     0   
2

In [117]:
output_filename = 'scored_ridings.csv'

elec_data.to_csv(output_filename)