# Data Cleaning and EDA

## Initial Overview


### Read in elections contribution data

In [65]:
# libraries
import pandas as pd
import numpy as np
import re
from rapidfuzz import process

In [147]:
# read in the csv
df = pd.read_csv('data/contributions/od_cntrbtn_audt_e.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [34]:
df.head()

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Recipient middle initial,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,...,Contributor first name,Contributor middle initial,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount,Non-Monetary amount,Contribution given through,Leadership contestant
0,﻿Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,...,DONAT,,Magog,QC,J1X 2C3,,400.0,0.0,,
1,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,...,REAL,,Sherbrooke,QC,J1L 2B6,,500.0,0.0,,
2,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,...,ANDRE,,Ascot,QC,J1K 3B4,,500.0,0.0,,
3,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,...,VIOLETTE,,Shebrooke,QC,J1H 4J9,,2500.0,0.0,,
4,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,...,JEAN,,Katevale,QC,J0B 1W0,,500.0,0.0,,


In [148]:
df.columns

Index(['Political Entity', 'Recipient ID', 'Recipient', 'Recipient last name',
       'Recipient first name', 'Recipient middle initial',
       'Political Party of Recipient', 'Electoral District', 'Electoral event',
       'Fiscal/Election date', 'Form ID', 'Financial Report',
       'Part Number of Return', 'Financial Report part', 'Contributor type',
       'Contributor name', 'Contributor last name', 'Contributor first name',
       'Contributor middle initial', 'Contributor City',
       'Contributor Province', 'Contributor Postal code',
       'Contribution Received date', 'Monetary amount', 'Non-Monetary amount',
       'Contribution given through', 'Leadership contestant'],
      dtype='object')

In [149]:
# check for any missing contributions - unlikely to be able to fill in the gaps
df['Monetary amount'].isna().any()

False

- so no missing contributions, which is excellent in terms of lack of gaps

In [150]:
# drop unneeded columns
df.drop(columns=['Recipient middle initial', 'Form ID', 'Financial Report',
       'Part Number of Return', 'Financial Report part', 'Contributor type',
       'Contributor name', 'Contributor last name', 'Contributor first name',
       'Contributor middle initial', 'Non-Monetary amount','Contribution given through', 'Leadership contestant'], inplace= True)
df

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount
0,﻿Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Magog,QC,J1X 2C3,,400.00
1,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Sherbrooke,QC,J1L 2B6,,500.00
2,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Ascot,QC,J1K 3B4,,500.00
3,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Shebrooke,QC,J1H 4J9,,2500.00
4,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Katevale,QC,J0B 1W0,,500.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5815958,Registered parties,50874,Free Party Canada,Free Party Canada,,Free Party Canada,,Annual,2022-12-31,,,,,380.00
5815959,Registered parties,44892,Free Party Canada,Free Party Canada,,Free Party Canada,,Annual,2021-12-31,,,,,27946.30
5815960,Registered parties,44892,Free Party Canada,Free Party Canada,,Free Party Canada,,Annual,2021-12-31,,,,,966.50
5815961,Registered parties,50876,Maverick Party,Maverick Party,,Maverick Party,,Annual,2022-12-31,,,,,43139.12


### Election events

Best way forward to join StatsCan sets on electoral outcome would be to identify the elections needed/separate as needed

In [151]:
df['Electoral event'].unique()

array(['38th general election', '39th general election',
       'November 27, 2006 By-elections', 'May 27, 2004, By-election',
       'May 24, 2005 By-election', 'March 17, 2008 By-elections',
       '40th general election', 'September 17, 2007 By-elections',
       'September 8, 2008 By-elections', '41st general election',
       'November 26, 2012 By-elections', 'September 22, 2008 By-election',
       'November 9, 2009 By-elections', 'November 29, 2010 By-elections',
       'March 19, 2012 By-election', 'June 30, 2014 By-elections',
       'November 25, 2013 By-elections', 'May 13, 2013 By-election',
       'November 17, 2014 By-elections', '42nd general election',
       'October 24, 2016 By-election', 'April 3, 2017 By-elections',
       'May 6, 2019 By-election', 'October 23, 2017 By-elections',
       'February 25, 2019, By-elections',
       'December 11, 2017, By-elections', 'June 18, 2018, By-election',
       'December 3, 2018, By-election', 'October 19, 2015 By-elections',


Noting above there are A LOT of electoral events - for now we focus on the general elections, by-election results is also available on Elections Canada website

In [None]:
## 44th election example
df[df['Electoral event'] == '44th general election']

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount
118278,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,Membertou,NS,B1S 3K8,2021-09-08,300.00
118279,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,Coxhealth,NS,B1R 1T8,2021-09-05,500.00
118280,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,Membertou,NS,B1S 0K2,2021-09-15,300.00
118281,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,North York,ON,M2M 2W2,2021-09-11,215.00
118282,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,Dutch Brook,NS,B1L 1E9,2021-09-19,400.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135436,Candidates,48462,"Zimmer, Katelyn",Zimmer,Katelyn,Liberal Party of Canada,Moose Jaw--Lake Centre--Lanigan,44th general election,2021-09-20,,,,,90.00
135437,Candidates,48463,"Yeh, Teresa",Yeh,Teresa,New Democratic Party,Brampton North,44th general election,2021-09-20,,,,,56.79
135438,Candidates,48465,"Dwyer, Madelaine",Dwyer,Madelaine,New Democratic Party,Charleswood--St. James--Assiniboia--Headingley,44th general election,2021-09-20,,,,,250.00
135439,Candidates,48470,"Niles, Tyler",Niles,Tyler,People's Party of Canada,Mission--Matsqui--Fraser Canyon,44th general election,2021-09-20,,,,,600.00


In [None]:
# get district codes for each district for better key matching
district_codes_df = pd.read_csv('data/ED-Canada_2021.csv', encoding='latin-1')
district_codes_df


Unnamed: 0,ED_CODE,ED_NAMEE,ED_NAMEF,POPULATION
0,10001,Avalon,Avalon,87191
1,10002,Bonavista--Burin--Trinity,Bonavista--Burin--Trinity,71898
2,10003,Coast of Bays--Central--Notre Dame,Coast of Bays--Central--Notre Dame,74201
3,10004,Labrador,Labrador,26655
4,10005,Long Range Mountains,Long Range Mountains,81716
...,...,...,...,...
333,59041,Victoria,Victoria,123482
334,59042,West Vancouver--Sunshine Coast--Sea to Sky Cou...,West Vancouver--Sunshine Coast--Sea to Sky Cou...,131206
335,60001,Yukon,Yukon,40232
336,61001,Northwest Territories,Territoires du Nord-Ouest,41070


In [257]:
# note that 38 - 41 general elections used difference districts as redistricting was done in 2012
# will pull from the 40th general elections list
earlier_district_codes_df = pd.read_csv('data/stats_can_general_elections/40_general_election_11.csv', encoding='latin-1')

# grab the relevant columns
earlier_district_codes_df = earlier_district_codes_df[['Electoral District Name/Nom de circonscription', 'Electoral District Number/Numéro de circonscription']]

# rename a column
earlier_district_codes_df.rename(columns={'Electoral District Number/Numéro de circonscription': 'ED_CODE'}, inplace= True)
earlier_district_codes_df['ED_CODE'] = earlier_district_codes_df['ED_CODE'].astype(int)

# save as a csv for future reference
earlier_district_codes_df.to_csv('data/ed_codes_pre_2012.csv')
earlier_district_codes_df

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE
0,Avalon,10001
1,Bonavista--Gander--Grand Falls--Windsor,10002
2,Humber--St. Barbe--Baie Verte,10003
3,Labrador,10004
4,Random--Burin--St. George's,10005
...,...,...
303,Victoria,59035
304,West Vancouver--Sunshine Coast--Sea to Sky Cou...,59036
305,Yukon,60001
306,Western Arctic,61001


In [258]:
# create a function to clean the district names
def clean_text(text):
    return re.sub(r'[^\w\s]', ' ', text.lower().strip())


# clean up the district names in the codes dataframes
district_codes_df['cleaned_district_name'] = district_codes_df['ED_NAMEE'].apply(clean_text)
earlier_district_codes_df['cleaned_district_name'] = earlier_district_codes_df['Electoral District Name/Nom de circonscription'].apply(clean_text)


In [259]:
# process all general elections 
# save individual dataframes just mostly for reference/isolation as needed
earlier_elections_list = ['38th general election', '39th general election', '40th general election',
                          '41st general election']

later_elections_list = ['42nd general election','43rd general election', '44th general election']

# two different loops since we have to use two different sets of electoral codes
for election in earlier_elections_list:
    # pull the dataframe for the specific elections
    election_df = df[df['Electoral event'] == election].copy()

    # clean the district names and merge to add the codes
    election_df['cleaned_district_name'] = election_df['Electoral District'].apply(clean_text)
    merged_df = election_df.merge(earlier_district_codes_df[['cleaned_district_name', 'ED_CODE']].copy(), on= 'cleaned_district_name', how='left')
    
    # drop the temporary name col (?) might keep for elections outcome merging

    # save to csv
    merged_df.to_csv(f'data/contributions/general_elections/{election}.csv')


for election in later_elections_list:
    # pull the dataframe for the specific elections
    election_df = df[df['Electoral event'] == election].copy()

    # clean the district names and merge to add the codes
    election_df['cleaned_district_name'] = election_df['Electoral District'].apply(clean_text)
    merged_df = election_df.merge(district_codes_df[['cleaned_district_name', 'ED_CODE']].copy(), on= 'cleaned_district_name', how='left')
    
    # drop the temporary name col (?) might keep for elections outcome merging

    # save to csv
    merged_df.to_csv(f'data/contributions/general_elections/{election}.csv')


### StatsCan/Election Canada Data

### Initial preprocessing and function set up

In [200]:
stats_df = pd.read_csv('data/stats_can_general_elections/44_general_election_12.csv')
stats_df

Unnamed: 0,Province,Electoral District Name/Nom de circonscription,Electoral District Number/Numéro de circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité
0,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Ken McDonald ** Liberal/Libéral,"Conception Bay South, N.L./ T.-N.-L.",Parliamentarian/Parlementaire,18608,50.1,5870.0,15.8
1,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Matthew Chapman Conservative/Conservateur,"Paradise, N.L./ T.-N.-L.",Teacher/Enseignant,12738,34.3,,
2,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Carolyn Davis NDP-New Democratic Party/NPD-Nou...,"Portugal Cove-St. Philip's, N.L./ T.-N.-L.",Claims Representative/Représentante en réclama...,5151,13.9,,
3,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Lainie Stewart People's Party - PPC/Parti popu...,"Mount Pearl, N.L./ T.-N.-L.",Business Owner/Propriétaire d'entreprise,647,1.7,,
4,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Burin--Trinity,10002,Churence Rogers ** Liberal/Libéral,"Centreville, N.L./ T.-N.-L.",Parliamentarian/Parlementaire,13972,46.6,1694.0,5.6
...,...,...,...,...,...,...,...,...,...,...
2005,Northwest Territories/Territoires du Nord-Ouest,Northwest Territories/Territoires du Nord-Ouest,61001,Jane Groenewegen Independent/Indépendant(e),"Hay River, N.W.T./ T.N.-O.",Business Woman/Femme d'affaires,1791,12.7,,
2006,Northwest Territories/Territoires du Nord-Ouest,Northwest Territories/Territoires du Nord-Ouest,61001,Roland Laufer Green Party/Parti Vert,"Yellowknife, N.W.T./ T.N.-O.",Store Manager/Directeur de magasin,328,2.3,,
2007,Nunavut,Nunavut,62001,Lori Idlout NDP-New Democratic Party/NPD-Nouve...,"Iqaluit, Nun./ Nt",Lawyer/Avocate,3427,47.7,849.0,11.8
2008,Nunavut,Nunavut,62001,Pat Angnakak Liberal/Libéral,"Iqaluit, Nun./ Nt",Parliamentarian/Parlementaire,2578,35.9,,


In [201]:
# function to set up add and drop columns 
def add_drop_cols(df):
    df['won_election'] = df['Majority/Majorité'].notna()
    df = df.drop(columns=['Province', 'Candidate Residence/Résidence du candidat','Candidate Occupation/Profession du candidat',
               'Votes Obtained/Votes obtenus', 'Majority/Majorité', 'Majority Percentage/Pourcentage de majorité'])

    return df

add_drop_cols(stats_df)

Unnamed: 0,Electoral District Name/Nom de circonscription,Electoral District Number/Numéro de circonscription,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election
0,Avalon,10001,Ken McDonald ** Liberal/Libéral,50.1,True
1,Avalon,10001,Matthew Chapman Conservative/Conservateur,34.3,False
2,Avalon,10001,Carolyn Davis NDP-New Democratic Party/NPD-Nou...,13.9,False
3,Avalon,10001,Lainie Stewart People's Party - PPC/Parti popu...,1.7,False
4,Bonavista--Burin--Trinity,10002,Churence Rogers ** Liberal/Libéral,46.6,True
...,...,...,...,...,...
2005,Northwest Territories/Territoires du Nord-Ouest,61001,Jane Groenewegen Independent/Indépendant(e),12.7,False
2006,Northwest Territories/Territoires du Nord-Ouest,61001,Roland Laufer Green Party/Parti Vert,2.3,False
2007,Nunavut,62001,Lori Idlout NDP-New Democratic Party/NPD-Nouve...,47.7,True
2008,Nunavut,62001,Pat Angnakak Liberal/Libéral,35.9,False


In [418]:
# set up dictionary of party names and variations to check the party
parties_dict = {
    "animal protection party of canada": [
        "animal protection party of canada",
        "animal protection party"
    ],
    "bloc québécois": [
        "bloc québécois",
        "bloc quebecois",
        "bloc",
        "bq"
    ],
    "canadian future party": [
        "canadian future party"
    ],
    "centrist party of canada": [
        "centrist party of canada",
        "centrist party"
    ],
    "christian heritage party of canada": [
        "christian heritage party of canada",
        "christian heritage party",
        "chp"
    ],
    "communist party of canada": [
        "communist party of canada",
        "communist party"
    ],
    "conservative": [
        "conservative",
        "conservative party",
        "conservative party of canada",
        "cpc"
    ],
    "green": [
        "green",
        "green party",
        "green party of canada",
        "green party/parti vert",
        "parti vert"
    ],
    "liberal": [
        "liberal",
        "liberal party",
        "liberal party of canada",
        "lpc"
    ],
    "libertarian party of canada": [
        "libertarian party of canada",
        "libertarian party"
    ],
    "marijuana party": [
        "marijuana party"
    ],
    "marxist-leninist party of canada": [
        "marxist-leninist party of canada",
        "marxist leninist party",
        "marxist leninist"
    ],
    "ndp": [
        "ndp",
        "new democratic party",
        "ndp - new democratic party",
        "ndp-new democratic party",
        "ndp/new democratic party"
    ],
    "parti rhinocéros party": [
        "parti rhinocéros party",
        "rhinoceros party",
        "parti rhinoceros"
    ],
    "united party of canada (up)": [
        "united party of canada",
        "up",
        "united party of canada (up)"
    ],
    "people's party of canada": [
        "people's party of canada",
        "people's party",
        "people's party - ppc",
        "peoples party",
        "peoples party of canada",
        "peoples party ppc"
    ],
    "independent": [
        "independent",
        "indépendant",
        "indépendante",
        "independent/indépendant",
        "independent/indépendante",
        "indépendant(e)"
    ],
    "other parties":[
        "pour l'indépendance du québec",
        "vcp",
        "cac",
        "vcp/cac",
        "canada's fourth front",
        "qfc",
        "qfc/canada's fourth front",
        "no affiliation",
        "aucune affiliation",
        "aucune appartenance",
        "national citizens alliance",
        "nca",
        "nationalist",
        "nationaliste",
        "parti patriote",
        "canadian action",
        "action canadienne",
        "ml/ml",
        "stop climate change",
        "democratic advancement",
        "forces et démocratie",
        "atn",
        "pirate",
        "pirate party",
        "cap",
        "pac",
        "pact",
        "the bridge",
        "fpnp",
        "wbp",
        "chp canada",
        "ppp",
        "neorhino.ca"
    ]
}

# a quick basic cleaning text function (similar to the one used in )
def clean_text_parties(text):
    return re.sub(r'[^\w\s]', '', text.lower().strip())

# flatting the dict into a list to use:
parties_list = []
for party, alternatives in parties_dict.items():
    parties_list.extend(alternatives)

# quick display to look at list   
parties_list

['animal protection party of canada',
 'animal protection party',
 'bloc québécois',
 'bloc quebecois',
 'bloc',
 'bq',
 'canadian future party',
 'centrist party of canada',
 'centrist party',
 'christian heritage party of canada',
 'christian heritage party',
 'chp',
 'communist party of canada',
 'communist party',
 'conservative',
 'conservative party',
 'conservative party of canada',
 'cpc',
 'green',
 'green party',
 'green party of canada',
 'green party/parti vert',
 'parti vert',
 'liberal',
 'liberal party',
 'liberal party of canada',
 'lpc',
 'libertarian party of canada',
 'libertarian party',
 'marijuana party',
 'marxist-leninist party of canada',
 'marxist leninist party',
 'marxist leninist',
 'ndp',
 'new democratic party',
 'ndp - new democratic party',
 'ndp-new democratic party',
 'ndp/new democratic party',
 'parti rhinocéros party',
 'rhinoceros party',
 'parti rhinoceros',
 'united party of canada',
 'up',
 'united party of canada (up)',
 "people's party of can

In [243]:
# function to separate out the party, and the name to match closer to the contributions dataframe

def get_candidate_names(text, parties_list):
    # string check, return None for manual check
    if not isinstance(text, str):
        return None
    
    # remove the stars with regex
    text = re.sub(r'\*\*', '', text).strip()


    # split out the party part
    before_slash, french_party = text.rsplit('/', 1)
    
    # grab the name & english party
    words = before_slash.strip().split()
    best_score = 0
    best_index = len(words)

    for i in range(1, len(words)):
        potential_party = ' '.join(words[i:])
        potential_party = clean_text_parties(potential_party)
        match, score, _ = process.extractOne(potential_party, parties_list)
        if score > best_score:
            best_score = score
            best_index = i

    if best_score > 85:
        return ' '.join(words[:best_index])

    # return the original text if it doesn't work, will flag all "None" for manual check
    return None


In [204]:
# apply to dataframes
stats_df['Candidate Name'] = stats_df['Candidate/Candidat'].apply(lambda x: get_candidate_names(x, parties_list))
stats_df

Unnamed: 0,Province,Electoral District Name/Nom de circonscription,Electoral District Number/Numéro de circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,won_election,Candidate Name
0,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Ken McDonald ** Liberal/Libéral,"Conception Bay South, N.L./ T.-N.-L.",Parliamentarian/Parlementaire,18608,50.1,5870.0,15.8,True,Ken McDonald
1,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Matthew Chapman Conservative/Conservateur,"Paradise, N.L./ T.-N.-L.",Teacher/Enseignant,12738,34.3,,,False,Matthew Chapman
2,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Carolyn Davis NDP-New Democratic Party/NPD-Nou...,"Portugal Cove-St. Philip's, N.L./ T.-N.-L.",Claims Representative/Représentante en réclama...,5151,13.9,,,False,Carolyn Davis
3,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Lainie Stewart People's Party - PPC/Parti popu...,"Mount Pearl, N.L./ T.-N.-L.",Business Owner/Propriétaire d'entreprise,647,1.7,,,False,Lainie Stewart
4,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Burin--Trinity,10002,Churence Rogers ** Liberal/Libéral,"Centreville, N.L./ T.-N.-L.",Parliamentarian/Parlementaire,13972,46.6,1694.0,5.6,True,Churence Rogers
...,...,...,...,...,...,...,...,...,...,...,...,...
2005,Northwest Territories/Territoires du Nord-Ouest,Northwest Territories/Territoires du Nord-Ouest,61001,Jane Groenewegen Independent/Indépendant(e),"Hay River, N.W.T./ T.N.-O.",Business Woman/Femme d'affaires,1791,12.7,,,False,Jane Groenewegen
2006,Northwest Territories/Territoires du Nord-Ouest,Northwest Territories/Territoires du Nord-Ouest,61001,Roland Laufer Green Party/Parti Vert,"Yellowknife, N.W.T./ T.N.-O.",Store Manager/Directeur de magasin,328,2.3,,,False,Roland Laufer
2007,Nunavut,Nunavut,62001,Lori Idlout NDP-New Democratic Party/NPD-Nouve...,"Iqaluit, Nun./ Nt",Lawyer/Avocate,3427,47.7,849.0,11.8,True,Lori Idlout
2008,Nunavut,Nunavut,62001,Pat Angnakak Liberal/Libéral,"Iqaluit, Nun./ Nt",Parliamentarian/Parlementaire,2578,35.9,,,False,Pat Angnakak


In [205]:
# check for any missing candidate names
stats_df[stats_df['Candidate Name'].isna()]

Unnamed: 0,Province,Electoral District Name/Nom de circonscription,Electoral District Number/Numéro de circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,won_election,Candidate Name


In [206]:
def split_name(text):
    # return Nones if somehow we missed something, to flag for a manual check
    if text is None:
        return pd.Series([None, None])
    
    names = text.split()
    # if somehow we only got the first name/one word, might require in manual check, set last name to none
    if len(names) == 1:
        return pd.Series([names[0], None])
    else:
        return pd.Series([names[0], names[-1]])

stats_df[['First Name', 'Last Name']] = stats_df['Candidate Name'].apply(split_name)
stats_df

Unnamed: 0,Province,Electoral District Name/Nom de circonscription,Electoral District Number/Numéro de circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,won_election,Candidate Name,First Name,Last Name
0,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Ken McDonald ** Liberal/Libéral,"Conception Bay South, N.L./ T.-N.-L.",Parliamentarian/Parlementaire,18608,50.1,5870.0,15.8,True,Ken McDonald,Ken,McDonald
1,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Matthew Chapman Conservative/Conservateur,"Paradise, N.L./ T.-N.-L.",Teacher/Enseignant,12738,34.3,,,False,Matthew Chapman,Matthew,Chapman
2,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Carolyn Davis NDP-New Democratic Party/NPD-Nou...,"Portugal Cove-St. Philip's, N.L./ T.-N.-L.",Claims Representative/Représentante en réclama...,5151,13.9,,,False,Carolyn Davis,Carolyn,Davis
3,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,10001,Lainie Stewart People's Party - PPC/Parti popu...,"Mount Pearl, N.L./ T.-N.-L.",Business Owner/Propriétaire d'entreprise,647,1.7,,,False,Lainie Stewart,Lainie,Stewart
4,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Burin--Trinity,10002,Churence Rogers ** Liberal/Libéral,"Centreville, N.L./ T.-N.-L.",Parliamentarian/Parlementaire,13972,46.6,1694.0,5.6,True,Churence Rogers,Churence,Rogers
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2005,Northwest Territories/Territoires du Nord-Ouest,Northwest Territories/Territoires du Nord-Ouest,61001,Jane Groenewegen Independent/Indépendant(e),"Hay River, N.W.T./ T.N.-O.",Business Woman/Femme d'affaires,1791,12.7,,,False,Jane Groenewegen,Jane,Groenewegen
2006,Northwest Territories/Territoires du Nord-Ouest,Northwest Territories/Territoires du Nord-Ouest,61001,Roland Laufer Green Party/Parti Vert,"Yellowknife, N.W.T./ T.N.-O.",Store Manager/Directeur de magasin,328,2.3,,,False,Roland Laufer,Roland,Laufer
2007,Nunavut,Nunavut,62001,Lori Idlout NDP-New Democratic Party/NPD-Nouve...,"Iqaluit, Nun./ Nt",Lawyer/Avocate,3427,47.7,849.0,11.8,True,Lori Idlout,Lori,Idlout
2008,Nunavut,Nunavut,62001,Pat Angnakak Liberal/Libéral,"Iqaluit, Nun./ Nt",Parliamentarian/Parlementaire,2578,35.9,,,False,Pat Angnakak,Pat,Angnakak


In [207]:
# check for missing last names to see if there are any splitting issues
stats_df[stats_df['Last Name'].isna()]

Unnamed: 0,Province,Electoral District Name/Nom de circonscription,Electoral District Number/Numéro de circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,won_election,Candidate Name,First Name,Last Name


In [369]:
# function to do the
def merge_elections(contribution_df, outcome_df):
    merged_df = contribution_df.merge(outcome_df, 
                                   on= ['ED_CODE','Recipient first name', 'Recipient last name'], how= 'inner')
    merged_df.drop(columns= ['Unnamed: 0','Electoral District/Circonscription', 'Electoral District Name/Nom de circonscription',
                             'Candidate/Candidat', 'cleaned_district_name_y','Candidate Name'], 
                             inplace = True, errors='ignore')
    return merged_df

### Process all elections

#### 38th Election

In [344]:
# grab the relevant data frames
election_contributions_38_df = pd.read_csv('data/contributions/general_elections/38th general election.csv')
election_outcomes_38_df = pd.read_csv('data/stats_can_general_elections/38_general_election_12.csv', encoding = 'latin-1')

In [345]:
# 38th election needs an additional merge to pick up the ED CODES
election_outcomes_38_df['cleaned_district_name'] = election_outcomes_38_df['Electoral District/Circonscription'].apply(clean_text)
election_outcomes_38_df = election_outcomes_38_df.merge(earlier_district_codes_df[['cleaned_district_name', 'ED_CODE']].copy(), on= 'cleaned_district_name', how='left')

election_outcomes_38_df

Unnamed: 0,Province,Electoral District/Circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,cleaned_district_name,ED_CODE
0,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,R. John Efford ** Liberal/Libéral,"Bareneed, N.L./T.-N.-L.",Parliamentarian/Parlementaire,18335,58.34,9124.0,29.0,avalon,10001.0
1,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,Rick Dalton Conservative/conservateur,"Harbour Main, N.L./T.-N.-L.",Business Manager/Directeur des opérations,9211,29.31,,,avalon,10001.0
2,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,Michael Kehoe N.D.P./N.P.D.,"Paradise, N.L./T.-N.-L.",Customer Service Representative/Représentant a...,3450,10.98,,,avalon,10001.0
3,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,Don C. Ferguson Green Party/Parti Vert,"Lethbridge, Alta/Alb.",Professor (Retired)/Professeur (retraité),430,1.37,,,avalon,10001.0
4,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Exploits,Scott Simms Liberal/Libéral,"Bishop's Falls, N.L./T.-N.-L.",Broadcaster/Radiodiffuseur,15970,48.20,2184.0,6.6,bonavista exploits,
...,...,...,...,...,...,...,...,...,...,...,...
1680,Nunavut,Nunavut,Nancy Karetak-Lindell ** Liberal/Libéral,"Arviat, Nnvt",Parliamentarian/Parlementaire,3818,51.30,2646.0,35.6,nunavut,62001.0
1681,Nunavut,Nunavut,Manitok Thompson Independent/Indépendant,"Rankin Inlet, Nnvt",Teacher/Enseignante,1172,15.75,,,nunavut,62001.0
1682,Nunavut,Nunavut,Bill Riddell N.D.P./N.P.D.,"Iqaluit, Nnvt",Mediator/Médiateur,1129,15.17,,,nunavut,62001.0
1683,Nunavut,Nunavut,Duncan Cunningham Conservative/conservateur,"Pond Inlet, Nnvt",Manager/Gestionnaire,1075,14.45,,,nunavut,62001.0


In [346]:
# catch any missing district codes
election_outcomes_38_df[election_outcomes_38_df['ED_CODE'].isna()]

Unnamed: 0,Province,Electoral District/Circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,cleaned_district_name,ED_CODE
4,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Exploits,Scott Simms Liberal/Libéral,"Bishop's Falls, N.L./T.-N.-L.",Broadcaster/Radiodiffuseur,15970,48.20,2184.0,6.6,bonavista exploits,
5,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Exploits,Rex Barnes ** Conservative/conservateur,"Grand Falls-Windsor, N.L./T.-N.-L.",Parliamentarian/Parlementaire,13786,41.61,,,bonavista exploits,
6,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Exploits,Samuel Robert McLean N.D.P./N.P.D.,"Gander, N.L./T.-N.-L.",Teacher/Enseignant,2667,8.05,,,bonavista exploits,
7,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Exploits,Ed Sailor White Green Party/Parti Vert,"St. John's, N.L./T.-N.-L.",Former WWF Wrestler/Ancien lutteur de la WWF,367,1.11,,,bonavista exploits,
8,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Exploits,John Lannon Independent/Indépendant,"Gander, N.L./T.-N.-L.",Self-employed/Travailleur autonome,344,1.04,,,bonavista exploits,
...,...,...,...,...,...,...,...,...,...,...,...
1665,British Columbia/Colombie-Britannique,West Vancouver--Sunshine Coast,Blair Wilson Liberal/Libéral,"West Vancouver, BC/C.-B.",Chartered Accountant/Comptable agréé,19685,32.51,,,west vancouver sunshine coast,
1666,British Columbia/Colombie-Britannique,West Vancouver--Sunshine Coast,Nicholas Simons N.D.P./N.P.D.,"Gibsons, BC/C.-B.",Consultant/Consultant,13156,21.73,,,west vancouver sunshine coast,
1667,British Columbia/Colombie-Britannique,West Vancouver--Sunshine Coast,Andrea Goldsmith Green Party/Parti Vert,"Gibsons, BC/C.-B.",Town Councillor/Conseillère municipale,5887,9.72,,,west vancouver sunshine coast,
1668,British Columbia/Colombie-Britannique,West Vancouver--Sunshine Coast,Marc Bombois Canadian Action/Action canadienne,"Squamish, BC/C.-B.",General Contractor/Entrepreneur général,321,0.53,,,west vancouver sunshine coast,


In [347]:
# filling the missing districts codes
missing_codes_map = {
    'Bonavista--Exploits': 10002,
    "St. John's North/St. John's-Nord": 10006,
    "St. John's South/St. John's-Sud": 10007,
    "North Nova/Nova-Nord": 12007,  # was renamed post-election to Cumberland--Colchester--Musquodoboit Valley
    "North Okanagan--Shuswap/Okanagan-Nord--Shuswap": 59018,
    "West Vancouver--Sunshine Coast": 59036,
    "Southern Interior": 59026,
    "Fundy": 13004,  # Fundy Royal
    "St. Croix--Belleisle": 13008,  # Renamed to New Brunswick Southwest
    "Argenteuil--Mirabel": 24004,  # Argenteuil--Papineau--Mirabel
    "Beauport": 24007,  # Beauport--Limoilou
    "Charlesbourg": 24013,  # Charlesbourg--Haute-Saint-Charles
    "Charlevoix--Montmorency": 24014,  # Montmorency--Charlevoix--Haute-Côte-Nord
    "Laurier": 24031,  # Laurier--Sainte-Marie
    "Longueuil": 24035,  # Longueuil--Pierre-Boucher
    "Matapédia--Matane": 24041,  # Haute-Gaspésie--La Mitis--Matane--Matapédia
    "Nunavik--Eeyou": 24046,  # Abitibi--Baie-James--Nunavik--Eeyou
    "Portneuf": 24051,  # Portneuf--Jacques-Cartier
    "Richelieu": 24054,  # Bas-Richelieu--Nicolet--Bécancour
    "Rimouski--Témiscouata": 24056,  # Rimouski-Neigette--Témiscouata--Les Basques
    "Rivière-du-Loup--Montmagny": 24058,  # Montmagny--L'Islet--Kamouraska--Rivière-du-Loup
    "Roberval": 24060,  # Roberval--Lac-Saint-Jean
    "Carleton--Lanark": 35012,  # Carleton--Mississippi Mills
    "Clarington--Scugog--Uxbridge": 35014,  # Renamed to Durham
    "Grey--Bruce--Owen Sound": 35026,  # Bruce--Grey--Owen Sound
    "Middlesex--Kent--Lambton": 35046,  # Lambton--Kent--Middlesex
    "Charleswood--St. James": 46002,  # Charleswood--St. James--Assiniboia
    "Dauphin--Swan River": 46004,  # Dauphin--Swan River--Marquette
    "Churchill River/Rivière Churchill": 47003,  # Desnethé--Missinippi--Churchill River
    "Athabasca": 48001,  # Fort McMurray--Athabasca
    "Calgary North Centre/Calgary-Centre-Nord": 48003,  # Calgary Centre-North/Calgary-Centre-Nord
    "Calgary South Centre/Calgary-Centre-Sud": 48006,  # Renamed to Calgary Centre
    "Edmonton--Beaumont": 48011,  # Edmonton--Mill Woods--Beaumont
    "Dewdney--Alouette": 59007,  # Renamed to Pitt Meadows--Maple Ridge--Mission
    "Kamloops--Thompson": 59010,  # Kamloops--Thompson--Cariboo
    "Kelowna": 59011,  # Kelowna--Lake Country
}

# mask and replace
mask = election_outcomes_38_df['ED_CODE'].isna()
election_outcomes_38_df.loc[mask, 'ED_CODE'] = election_outcomes_38_df.loc[mask, 'Electoral District/Circonscription'].map(missing_codes_map)

# do a double check
election_outcomes_38_df[election_outcomes_38_df['ED_CODE'].isna()]

#election_outcomes_38_df[election_outcomes_38_df['ED_CODE'].isna()]['Electoral District/Circonscription'].unique()

Unnamed: 0,Province,Electoral District/Circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,cleaned_district_name,ED_CODE


In [348]:
# preprocess the outcomes
election_outcomes_38_df = add_drop_cols(election_outcomes_38_df)
election_outcomes_38_df['ED_CODE'] = election_outcomes_38_df['ED_CODE'].astype(int)
election_outcomes_38_df['Candidate Name'] = election_outcomes_38_df['Candidate/Candidat'].apply(lambda x: get_candidate_names(x, parties_list))

# check for missing candidates
election_outcomes_38_df[election_outcomes_38_df['Candidate Name'].isna()]

Unnamed: 0,Electoral District/Circonscription,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,cleaned_district_name,ED_CODE,won_election,Candidate Name


In [349]:
# split the candidate names
election_outcomes_38_df[['Recipient first name', 'Recipient last name']] = election_outcomes_38_df['Candidate Name'].apply(split_name)

# check for any missing last names
election_outcomes_38_df[election_outcomes_38_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District/Circonscription,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,cleaned_district_name,ED_CODE,won_election,Candidate Name,Recipient first name,Recipient last name


In [352]:
# merge with the outcomes
election_38_df = merge_elections(election_contributions_38_df, election_outcomes_38_df)
election_38_df.to_csv('data/merged/38_general_election.csv')
election_38_df

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount,cleaned_district_name_x,ED_CODE,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election
0,﻿Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Magog,QC,J1X 2C3,,400.0,sherbrooke,24070.0,31.01,False
1,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Sherbrooke,QC,J1L 2B6,,500.0,sherbrooke,24070.0,31.01,False
2,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Ascot,QC,J1K 3B4,,500.0,sherbrooke,24070.0,31.01,False
3,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Shebrooke,QC,J1H 4J9,,2500.0,sherbrooke,24070.0,31.01,False
4,Candidates,4716,"Béchard, Bruno-Marie",Béchard,Bruno-Marie,Liberal Party of Canada,Sherbrooke,38th general election,2004-06-28,Katevale,QC,J0B 1W0,,500.0,sherbrooke,24070.0,31.01,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18501,Candidates,5250,"Carter, Roger",Carter,Roger,Marxist-Leninist Party of Canada,Beaches--East York,38th general election,2004-06-28,,,,,0.0,beaches east york,35005.0,0.10,False
18502,Candidates,5615,"Giguère, Philippe",Giguère,Philippe,New Democratic Party,Beauce,38th general election,2004-06-28,,,,,75.0,beauce,24005.0,3.05,False
18503,Candidates,5141,"Morrison, Dean",Morrison,Dean,New Democratic Party,Langley,38th general election,2004-06-28,,,,,0.0,langley,59013.0,16.75,False
18504,Candidates,4549,"Paradis, Denis",Paradis,Denis,Liberal Party of Canada,Brome--Missisquoi,38th general election,2004-06-28,,,,,0.0,brome missisquoi,24010.0,42.08,True


#### 39th Election

In [424]:
# grab the relevant data frames
election_contributions_39_df = pd.read_csv('data/contributions/general_elections/39th general election.csv')
election_outcomes_39_df = pd.read_csv('data/stats_can_general_elections/39_general_election_12.csv', encoding = 'latin-1')

In [426]:
# 39th election needs an additional merge to pick up the ED CODES
election_outcomes_39_df['cleaned_district_name'] = election_outcomes_39_df['Electoral District/Circonscription'].apply(clean_text)
election_outcomes_39_df = election_outcomes_39_df.merge(earlier_district_codes_df[['cleaned_district_name', 'ED_CODE']].copy(), on= 'cleaned_district_name', how='left')

election_outcomes_39_df

Unnamed: 0,Province,Electoral District/Circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,cleaned_district_name,ED_CODE
0,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,Fabian Manning Conservative/Conservateur,"St. Bride's, N.L./T.-N.-L.",Parliamentarian/Parlementaire,19132,51.55,4814.0,13.0,avalon,10001
1,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,Bill Morrow Liberal/Libéral,"Bay Roberts, N.L./T.-N.-L.",Lawyer/Avocat,14318,38.58,,,avalon,10001
2,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,Eugene Conway N.D.P./N.P.D.,"Conception Harbour, N.L./T.-N.-L.",Business Owner/Propriétaire d'entreprise,3365,9.07,,,avalon,10001
3,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Avalon,Shannon Hillier Green Party/Parti Vert,"St. John's, N.L./T.-N.-L.",Provincial Organizer/Organisatrice provinciale,297,0.80,,,avalon,10001
4,Newfoundland and Labrador/Terre-Neuve-et-Labrador,Bonavista--Gander--Grand Falls--Windsor,Scott Simms ** Liberal/Libéral,"Bishop's Falls, N.L./T.-N.-L.",Parliamentarian/Parlementaire,19866,52.04,4490.0,11.8,bonavista gander grand falls windsor,10002
...,...,...,...,...,...,...,...,...,...,...,...
1629,Nunavut,Nunavut,Nancy Karetak-Lindell ** Liberal/Libéral,"Arviat, Nun./Nt",Parliamentarian/Parlementaire,3673,39.98,1003.0,10.9,nunavut,62001
1630,Nunavut,Nunavut,David Aglukark Sr. Conservative/Conservateur,"Arviat, Nun./Nt",Self-employed/Travailleur indépendant,2670,29.06,,,nunavut,62001
1631,Nunavut,Nunavut,Bill Riddell N.D.P./N.P.D.,"Iqaluit, Nun./Nt",Consultant/Consultant,1576,17.15,,,nunavut,62001
1632,Nunavut,Nunavut,D. Ed deVries Marijuana Party/Parti Marijuana,"Iqaluit, Nun./Nt",Traditional Healer/Guérisseur traditionnel,724,7.88,,,nunavut,62001


In [428]:
# catch any missing district codes
election_outcomes_39_df[election_outcomes_39_df['ED_CODE'].isna()]

Unnamed: 0,Province,Electoral District/Circonscription,Candidate/Candidat,Candidate Residence/Résidence du candidat,Candidate Occupation/Profession du candidat,Votes Obtained/Votes obtenus,Percentage of Votes Obtained /Pourcentage des votes obtenus,Majority/Majorité,Majority Percentage/Pourcentage de majorité,cleaned_district_name,ED_CODE


In [429]:
# preprocess the outcomes
election_outcomes_39_df = add_drop_cols(election_outcomes_39_df)
election_outcomes_39_df['ED_CODE'] = election_outcomes_39_df['ED_CODE'].astype(int)
election_outcomes_39_df['Candidate Name'] = election_outcomes_39_df['Candidate/Candidat'].apply(lambda x: get_candidate_names(x, parties_list))

# check for missing candidates
election_outcomes_39_df[election_outcomes_39_df['Candidate Name'].isna()]

Unnamed: 0,Electoral District/Circonscription,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,cleaned_district_name,ED_CODE,won_election,Candidate Name


In [430]:
# split the candidate names
election_outcomes_39_df[['Recipient first name', 'Recipient last name']] = election_outcomes_39_df['Candidate Name'].apply(split_name)

# check for any missing last names
election_outcomes_39_df[election_outcomes_39_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District/Circonscription,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,cleaned_district_name,ED_CODE,won_election,Candidate Name,Recipient first name,Recipient last name


In [431]:
# merge with the outcomes
election_39_df = merge_elections(election_contributions_39_df, election_outcomes_39_df)
election_39_df.to_csv('data/merged/39_general_election.csv')
election_39_df

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount,cleaned_district_name_x,ED_CODE,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election
0,Candidates,8865,"Watson, Jeff",Watson,Jeff,Conservative Party of Canada,Essex,39th general election,2006-01-23,Westmount,QC,H3Y 1E9,2006-01-20,500.0,essex,35021.0,40.40,True
1,Candidates,8865,"Watson, Jeff",Watson,Jeff,Conservative Party of Canada,Essex,39th general election,2006-01-23,Toronto,ON,M4T 2S3,2006-01-20,500.0,essex,35021.0,40.40,True
2,Candidates,8865,"Watson, Jeff",Watson,Jeff,Conservative Party of Canada,Essex,39th general election,2006-01-23,Toronto,ON,M5P 2P8,2006-01-20,250.0,essex,35021.0,40.40,True
3,Candidates,8865,"Watson, Jeff",Watson,Jeff,Conservative Party of Canada,Essex,39th general election,2006-01-23,Toronto,ON,M5N 1R5,2006-01-20,1000.0,essex,35021.0,40.40,True
4,Candidates,8865,"Watson, Jeff",Watson,Jeff,Conservative Party of Canada,Essex,39th general election,2006-01-23,Toronto,ON,M3H 5S4,2006-01-20,750.0,essex,35021.0,40.40,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21577,Candidates,8707,"Fornssler, Mike",Fornssler,Mike,Green Party of Canada,Blackstrap,39th general election,2006-01-23,,,,,0.0,blackstrap,47002.0,3.29,False
21578,Candidates,9140,"Maddin, Jim",Maddin,Jim,New Democratic Party,Saskatoon--Wanuskewin,39th general election,2006-01-23,,,,,0.0,saskatoon wanuskewin,47011.0,22.09,False
21579,Candidates,9106,"Maundcote, Paula",Maundcote,Paula,New Democratic Party,Shefford,39th general election,2006-01-23,,,,,0.0,shefford,24069.0,4.73,False
21580,Candidates,8078,"Cardin, Serge",Cardin,Serge,Bloc Québécois,Sherbrooke,39th general election,2006-01-23,,,,,0.0,sherbrooke,24070.0,52.20,True


#### 40th Election

In [419]:
# grab the data frames
election_contributions_40_df = pd.read_csv('data/contributions/general_elections/40th general election.csv')
election_outcomes_40_df = pd.read_csv('data/stats_can_general_elections/40_general_election_12.csv', encoding = 'latin-1')

In [420]:
# preprocess the outcomes
# update the electoral district rename
election_outcomes_40_df.rename(columns={"Electoral District Number/Numéro de circonscription": "ED_CODE"}, inplace= True)
election_outcomes_40_df = add_drop_cols(election_outcomes_40_df)
election_outcomes_40_df['ED_CODE'] = election_outcomes_40_df['ED_CODE'].astype(int)
election_outcomes_40_df['Candidate Name'] = election_outcomes_40_df['Candidate/Candidat'].apply(lambda x: get_candidate_names(x, parties_list))

# check for missing candidates
election_outcomes_40_df[election_outcomes_40_df['Candidate Name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name


In [422]:
# split the candidate names
election_outcomes_40_df[['Recipient first name', 'Recipient last name']] = election_outcomes_40_df['Candidate Name'].apply(split_name)

# check for any missing last names
election_outcomes_40_df[election_outcomes_40_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name,Recipient first name,Recipient last name


In [423]:
# merge with the outcomes
election_40_df = merge_elections(election_contributions_40_df, election_outcomes_40_df)
election_40_df.to_csv('data/merged/40_general_election.csv')
election_40_df

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount,cleaned_district_name,ED_CODE,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election
0,Candidates,15343,"McQuail, Tony",McQuail,Tony,New Democratic Party,Huron--Bruce,40th general election,2008-10-14,Clinton,ON,NOM 1L0,2008-10-03,400.0,huron bruce,35034.0,14.99,False
1,Candidates,15343,"McQuail, Tony",McQuail,Tony,New Democratic Party,Huron--Bruce,40th general election,2008-10-14,Goderich,ON,N7A 3Y1,2008-10-03,300.0,huron bruce,35034.0,14.99,False
2,Candidates,15343,"McQuail, Tony",McQuail,Tony,New Democratic Party,Huron--Bruce,40th general election,2008-10-14,Bayfield,ON,N0M 1G0,2008-10-14,400.0,huron bruce,35034.0,14.99,False
3,Candidates,15343,"McQuail, Tony",McQuail,Tony,New Democratic Party,Huron--Bruce,40th general election,2008-10-14,Kincardine,ON,N2Z 2X3,2008-10-14,250.0,huron bruce,35034.0,14.99,False
4,Candidates,15343,"McQuail, Tony",McQuail,Tony,New Democratic Party,Huron--Bruce,40th general election,2008-10-14,Lucknow,ON,N0G 2H0,2008-10-14,500.0,huron bruce,35034.0,14.99,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14349,Candidates,16483,"Côrriveau, Sébastien",Côrriveau,Sébastien,Parti Rhinocéros Party,Sherbrooke,40th general election,2008-10-14,,,,,180.0,sherbrooke,24070.0,0.92,False
14350,Candidates,16592,"Stone, Ray",Stone,Ray,New Democratic Party,Vegreville--Wainwright,40th general election,2008-10-14,,,,,126.0,vegreville wainwright,48024.0,9.45,False
14351,Candidates,16556,"Aitchison, Betty",Aitchison,Betty,Liberal Party of Canada,Kootenay--Columbia,40th general election,2008-10-14,,,,,400.0,kootenay columbia,59012.0,7.75,False
14352,Candidates,15096,"Komlosy, André",Komlosy,André,Conservative Party of Canada,Drummond,40th general election,2008-10-14,,,,,0.0,drummond,24018.0,25.31,False


#### 41st Election

In [412]:
# grab the data frames
election_contributions_41_df = pd.read_csv('data/contributions/general_elections/41st general election.csv')
election_outcomes_41_df = pd.read_csv('data/stats_can_general_elections/41_general_election_12.csv', encoding = 'latin-1')

In [413]:
# preprocess the outcomes
# update the electoral district rename
election_outcomes_41_df.rename(columns={"Electoral District Number/Numéro de circonscription": "ED_CODE"}, inplace= True)
election_outcomes_41_df = add_drop_cols(election_outcomes_41_df)
election_outcomes_41_df['ED_CODE'] = election_outcomes_41_df['ED_CODE'].astype(int)
election_outcomes_41_df['Candidate Name'] = election_outcomes_41_df['Candidate/Candidat'].apply(lambda x: get_candidate_names(x, parties_list))

# check for missing candidates
election_outcomes_41_df[election_outcomes_41_df['Candidate Name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name


In [414]:
# split the candidate names
election_outcomes_41_df[['Recipient first name', 'Recipient last name']] = election_outcomes_41_df['Candidate Name'].apply(split_name)

# check for any missing last names
election_outcomes_41_df[election_outcomes_41_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name,Recipient first name,Recipient last name


In [415]:
# merge with the outcomes
election_41_df = merge_elections(election_contributions_41_df, election_outcomes_41_df)
election_41_df.to_csv('data/merged/41_general_election.csv')
election_41_df

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount,cleaned_district_name,ED_CODE,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election
0,Candidates,21677,"Easter, Wayne",Easter,Wayne,Liberal Party of Canada,Malpeque,41st general election,2011-05-02,Cornwall,PE,C0A1H0,2011-04-04,1100.0,malpeque,11004.0,42.4,True
1,Candidates,21677,"Easter, Wayne",Easter,Wayne,Liberal Party of Canada,Malpeque,41st general election,2011-05-02,North Wiltshire,PE,C0A1Y0,2011-04-12,400.0,malpeque,11004.0,42.4,True
2,Candidates,21677,"Easter, Wayne",Easter,Wayne,Liberal Party of Canada,Malpeque,41st general election,2011-05-02,North Wiltshire,PE,C0A1Y0,2011-04-12,400.0,malpeque,11004.0,42.4,True
3,Candidates,21677,"Easter, Wayne",Easter,Wayne,Liberal Party of Canada,Malpeque,41st general election,2011-05-02,Charlottetown,PE,C1A7J7,2011-04-29,500.0,malpeque,11004.0,42.4,True
4,Candidates,21677,"Easter, Wayne",Easter,Wayne,Liberal Party of Canada,Malpeque,41st general election,2011-05-02,Charlottetown,PE,C1A8C3,2011-04-14,300.0,malpeque,11004.0,42.4,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12474,Candidates,22819,"Barney, Rachel",Barney,Rachel,Green Party of Canada,Trinity--Spadina,41st general election,2011-05-02,,,,,550.0,trinity spadina,35095.0,4.4,False
12475,Candidates,22791,"Lescarbeau, Nicholas",Lescarbeau,Nicholas,Green Party of Canada,Chambly--Borduas,41st general election,2011-05-02,,,,,10.0,chambly borduas,24012.0,1.5,False
12476,Candidates,22634,"Belanger, Matthew",Belanger,Matthew,Libertarian Party of Canada,Oshawa,41st general election,2011-05-02,,,,,0.0,oshawa,35061.0,0.5,False
12477,Candidates,23019,"Schellenberg, Konrad",Schellenberg,Konrad,Green Party of Canada,Crowfoot,41st general election,2011-05-02,,,,,0.0,crowfoot,48010.0,3.3,False


#### 42nd Election

In [393]:
# grab the data frames
election_contributions_42_df = pd.read_csv('data/contributions/general_elections/42nd general election.csv')
election_outcomes_42_df = pd.read_csv('data/stats_can_general_elections/42_general_election_12.csv')

In [394]:
# preprocess the outcomes
# update the electoral district rename
election_outcomes_42_df.rename(columns={"Electoral District Number/Numéro de circonscription": "ED_CODE"}, inplace= True)
election_outcomes_42_df = add_drop_cols(election_outcomes_42_df)
election_outcomes_42_df['ED_CODE'] = election_outcomes_42_df['ED_CODE'].astype(int)
election_outcomes_42_df['Candidate Name'] = election_outcomes_42_df['Candidate/Candidat'].apply(lambda x: get_candidate_names(x, parties_list))

# check for missing candidates
election_outcomes_42_df[election_outcomes_42_df['Candidate Name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name


In [395]:
# split the candidate names
election_outcomes_42_df[['Recipient first name', 'Recipient last name']] = election_outcomes_42_df['Candidate Name'].apply(split_name)

# check for any missing last names
election_outcomes_42_df[election_outcomes_42_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name,Recipient first name,Recipient last name
5,Avalon,10001,Jennifer McCreath Forces et Démocratie - Allie...,0.2,False,Jennifer,Jennifer,
174,Avignon--La Mitis--Matane--Matapédia,24006,Jean-François Fortin ** Forces et Démocratie -...,11.6,False,Jean-François,Jean-François,
188,Beauport--Côte-de-Beaupré--Île d'Orléans--Char...,24020,Mario Desjardins Pelchat Forces et Démocratie ...,0.4,False,Mario,Mario,
196,Beauport--Limoilou,24008,Bladimir Laborit Forces et Démocratie - Allier...,0.2,False,Bladimir,Bladimir,
225,Bourassa,24015,Jean-Marie Floriant Ndzana Forces et Démocrati...,0.2,False,Jean-Marie,Jean-Marie,
231,Brome--Missisquoi,24016,Patrick Paine Forces et Démocratie - Allier le...,0.3,False,Patrick,Patrick,
296,Honoré-Mercier,24029,Dayana Dejean Forces et Démocratie - Allier le...,0.3,False,Dayana,Dayana,
311,Joliette,24031,Robert D. Morais Forces et Démocratie - Allier...,0.4,False,Robert,Robert,
324,La Pointe-de-l'Île,24033,Jean-François Larose Forces et Démocratie - Al...,0.2,False,Jean-François,Jean-François,
385,Longueuil--Saint-Hubert,24043,Affine Lwalalika Forces et Démocratie - Allier...,0.3,False,Affine,Affine,


In [397]:
missing_last_name_map = {
    "Jennifer McCreath Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "McCreath",
    "Jean-François Fortin ** Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Fortin",
    "Mario Desjardins Pelchat Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Pelchat",
    "Bladimir Laborit Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Laborit",
    "Jean-Marie Floriant Ndzana Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Ndzana",
    "Patrick Paine Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Paine",
    "Dayana Dejean Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Dejean",
    "Robert D. Morais Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Morais",
    "Jean-François Larose Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Larose",
    "Affine Lwalalika Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Lwalalika",
    "Manon Perreault ** Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Perreault",
    "Pascal Médieu Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Médieu",
    "Danielle Provost Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Provost",
    "Johnathan Cloutier Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Cloutier",
    "Patricia Domingos Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Domingos",
    "Louis Clément Sénat Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Sénat",
    "Toban Leckie Forces et Démocratie - Allier les forces de nos régions/Forces et Démocratie - Allier les forces de nos régions": "Leckie",
    "Raj Gupta PC Party/Parti PC": "Gupta"
}

mask = election_outcomes_42_df['Recipient last name'].isna()
election_outcomes_42_df.loc[mask, 'Recipient last name'] = election_outcomes_42_df.loc[mask, 'Candidate/Candidat'].map(missing_last_name_map)

# check again
election_outcomes_42_df[election_outcomes_42_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name,Recipient first name,Recipient last name


In [398]:
# merge with the outcomes
election_42_df = merge_elections(election_contributions_42_df, election_outcomes_42_df)
election_42_df.to_csv('data/merged/42_general_election.csv')
election_42_df

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount,cleaned_district_name,ED_CODE,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election
0,﻿Candidates,30603,"Begg, Garry",Begg,Garry,New Democratic Party,Fleetwood--Port Kells,42nd general election,2015-10-19,Surrey,BC,V3S 6A3,2015-10-02,500.00,fleetwood port kells,59012,21.5,False
1,Candidates,30603,"Begg, Garry",Begg,Garry,New Democratic Party,Fleetwood--Port Kells,42nd general election,2015-10-19,Surrey,BC,V3S2Z6,2015-10-09,500.00,fleetwood port kells,59012,21.5,False
2,Candidates,30603,"Begg, Garry",Begg,Garry,New Democratic Party,Fleetwood--Port Kells,42nd general election,2015-10-19,Surrey,BC,V3S 0T9,2015-10-18,500.00,fleetwood port kells,59012,21.5,False
3,Candidates,30603,"Begg, Garry",Begg,Garry,New Democratic Party,Fleetwood--Port Kells,42nd general election,2015-10-19,Surrey,BC,V4N 5L7,2015-10-09,500.00,fleetwood port kells,59012,21.5,False
4,Candidates,30603,"Begg, Garry",Begg,Garry,New Democratic Party,Fleetwood--Port Kells,42nd general election,2015-10-19,Surrey,BC,V3S2J2,2015-10-05,500.00,fleetwood port kells,59012,21.5,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18496,Candidates,30537,"Albas, Dan",Albas,Dan,Conservative Party of Canada,Central Okanagan--Similkameen--Nicola,42nd general election,2015-10-19,,,,,0.00,central okanagan similkameen nicola,59005,39.6,True
18497,Candidates,30399,"Quimpo, Jojo",Quimpo,Jojo,Conservative Party of Canada,Vancouver Kingsway,42nd general election,2015-10-19,,,,,731.83,vancouver kingsway,59038,21.0,False
18498,Candidates,32532,"Gaudreau, Daniel",Gaudreau,Daniel,Conservative Party of Canada,Laurier--Sainte-Marie,42nd general election,2015-10-19,,,,,160.00,laurier sainte marie,24039,4.1,False
18499,Candidates,30463,"Catellier, François",Catellier,François,Conservative Party of Canada,Saint Boniface--Saint Vital,42nd general election,2015-10-19,,,,,200.00,saint boniface saint vital,46009,28.7,False


#### 43rd Election

In [None]:
# grab the data frames
election_contributions_43_df = pd.read_csv('data/contributions/general_elections/43rd general election.csv')
election_outcomes_43_df = pd.read_csv('data/stats_can_general_elections/43_general_election_12.csv')

In [None]:
# preprocess the outcomes
# update the electoral district rename
election_outcomes_43_df.rename(columns={"Electoral District Number/Numéro de circonscription": "ED_CODE"}, inplace= True)
election_outcomes_43_df = add_drop_cols(election_outcomes_43_df)
election_outcomes_43_df['ED_CODE'] = election_outcomes_43_df['ED_CODE'].astype(int)
election_outcomes_43_df['Candidate Name'] = election_outcomes_43_df['Candidate/Candidat'].apply(lambda x: get_candidate_names(x, parties_list))

# check for missing candidates
election_outcomes_43_df[election_outcomes_43_df['Candidate Name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name


In [None]:
# split the candidate names
election_outcomes_43_df[['Recipient first name', 'Recipient last name']] = election_outcomes_43_df['Candidate Name'].apply(split_name)

# check for any missing last names
election_outcomes_43_df[election_outcomes_43_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name,Recipient first name,Recipient last name
992,Humber River--Black Creek,35121,Stenneth Smith UPC/UPC,0.3,False,Stenneth,Stenneth,
1116,Mississauga--Lakeshore,35061,Carlton Darby UPC/UPC,0.2,False,Carlton,Carlton,
1122,Mississauga--Malton,35062,Prudence Buchanan UPC/UPC,0.6,False,Prudence,Prudence,
1318,Scarborough North/Scarborough-Nord,35096,Janet Robinson UPC/UPC,0.2,False,Janet,Janet,
2122,Victoria,59041,Robert Duncan Communist/Communiste,0.2,False,Robert,Robert,


In [None]:
# fill in some missing last names
missing_last_name_map = {
    "Stenneth Smith UPC/UPC": "Smith",
    "Carlton Darby UPC/UPC": "Darby",
    "Prudence Buchanan UPC/UPC": "Buchanan",
    "Janet Robinson UPC/UPC": "Robinson",
    "Robert Duncan Communist/Communiste": "Duncan"
}
mask = election_outcomes_43_df['Recipient last name'].isna()
election_outcomes_43_df.loc[mask, 'Recipient last name'] = election_outcomes_43_df.loc[mask, 'Candidate/Candidat'].map(missing_last_name_map)

# check again
election_outcomes_43_df[election_outcomes_43_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name,Recipient first name,Recipient last name


In [None]:
# merge with the outcomes
election_43_df = merge_elections(election_contributions_43_df, election_outcomes_43_df)
election_43_df.to_csv('data/merged/43_general_election.csv')
election_43_df

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount,cleaned_district_name,ED_CODE,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election
0,Candidates,42102,"Pupatello, Sandra",Pupatello,Sandra,Liberal Party of Canada,Windsor West,43rd general election,2019-10-21,WINDSOR,ON,N9E4P5,2019-10-04,1600.0,windsor west,35117,36.3,False
1,Candidates,42102,"Pupatello, Sandra",Pupatello,Sandra,Liberal Party of Canada,Windsor West,43rd general election,2019-10-21,TECUMSEH,ON,N8N5H1,2019-09-27,500.0,windsor west,35117,36.3,False
2,Candidates,42102,"Pupatello, Sandra",Pupatello,Sandra,Liberal Party of Canada,Windsor West,43rd general election,2019-10-21,BELLERIVER,ON,N0R1A0,2019-10-11,1600.0,windsor west,35117,36.3,False
3,Candidates,42102,"Pupatello, Sandra",Pupatello,Sandra,Liberal Party of Canada,Windsor West,43rd general election,2019-10-21,WINDSOR,ON,N9E4P5,2019-10-04,1600.0,windsor west,35117,36.3,False
4,Candidates,42102,"Pupatello, Sandra",Pupatello,Sandra,Liberal Party of Canada,Windsor West,43rd general election,2019-10-21,WINDSOR,ON,N9E1W1,2019-09-27,500.0,windsor west,35117,36.3,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
541,Candidates,41662,"Tordjman, David",Tordjman,David,Conservative Party of Canada,Mount Royal,43rd general election,2019-10-21,Dollard-des-Ormeaux,QC,H9A3G2,2019-10-20,1200.0,mount royal,24052,24.9,False
542,Candidates,41662,"Tordjman, David",Tordjman,David,Conservative Party of Canada,Mount Royal,43rd general election,2019-10-21,,,,,200.0,mount royal,24052,24.9,False
543,Candidates,41905,"Veilleux, Sophie",Veilleux,Sophie,Conservative Party of Canada,Papineau,43rd general election,2019-10-21,,,,,0.0,papineau,24055,4.2,False
544,Candidates,41513,"Laplante, Hugues",Laplante,Hugues,Conservative Party of Canada,Châteauguay--Lacolle,43rd general election,2019-10-21,,,,,840.0,châteauguay lacolle,24021,11.2,False


#### 44th Election

In [357]:
# grab the data frames
election_contributions_44_df = pd.read_csv('data/contributions/general_elections/44th general election.csv')
election_outcomes_44_df = pd.read_csv('data/stats_can_general_elections/44_general_election_12.csv')

In [358]:
# preprocess the outcomes
# update the electoral district rename
election_outcomes_44_df.rename(columns={"Electoral District Number/Numéro de circonscription": "ED_CODE"}, inplace= True)
election_outcomes_44_df = add_drop_cols(election_outcomes_44_df)
election_outcomes_44_df['ED_CODE'] = election_outcomes_44_df['ED_CODE'].astype(int)
election_outcomes_44_df['Candidate Name'] = election_outcomes_44_df['Candidate/Candidat'].apply(lambda x: get_candidate_names(x, parties_list))

# check for missing candidates
election_outcomes_44_df[election_outcomes_44_df['Candidate Name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name


In [365]:
# split the candidate names
election_outcomes_44_df[['Recipient first name', 'Recipient last name']] = election_outcomes_44_df['Candidate Name'].apply(split_name)

# check for any missing last names
election_outcomes_44_df[election_outcomes_44_df['Recipient last name'].isna()]

Unnamed: 0,Electoral District Name/Nom de circonscription,ED_CODE,Candidate/Candidat,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election,Candidate Name,Recipient first name,Recipient last name


In [370]:
# merge with the outcomes
election_44_df = merge_elections(election_contributions_44_df, election_outcomes_44_df)
election_44_df.to_csv('data/merged/44_general_election.csv')
election_44_df

Unnamed: 0,Political Entity,Recipient ID,Recipient,Recipient last name,Recipient first name,Political Party of Recipient,Electoral District,Electoral event,Fiscal/Election date,Contributor City,Contributor Province,Contributor Postal code,Contribution Received date,Monetary amount,cleaned_district_name,ED_CODE,Percentage of Votes Obtained /Pourcentage des votes obtenus,won_election
0,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,Membertou,NS,B1S 3K8,2021-09-08,300.0,sydney victoria,12010,19.9,False
1,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,Coxhealth,NS,B1R 1T8,2021-09-05,500.0,sydney victoria,12010,19.9,False
2,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,Membertou,NS,B1S 0K2,2021-09-15,300.0,sydney victoria,12010,19.9,False
3,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,North York,ON,M2M 2W2,2021-09-11,215.0,sydney victoria,12010,19.9,False
4,Candidates,47414,"Ward, Jeff",Ward,Jeff,New Democratic Party,Sydney--Victoria,44th general election,2021-09-20,Dutch Brook,NS,B1L 1E9,2021-09-19,400.0,sydney victoria,12010,19.9,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6824,Candidates,47098,"Richards, Blake",Richards,Blake,Conservative Party of Canada,Banff--Airdrie,44th general election,2021-09-20,,,,,0.0,banff airdrie,48001,56.7,True
6825,Candidates,47567,"Cook, Anthony",Cook,Anthony,People's Party of Canada,Vancouver South,44th general election,2021-09-20,,,,,20.0,vancouver south,59040,2.7,False
6826,Candidates,47402,"Wright, Michael",Wright,Michael,Green Party of Canada,Regina--Lewvan,44th general election,2021-09-20,,,,,30.0,regina lewvan,47007,1.2,False
6827,Candidates,47647,"Hickey, Jason",Hickey,Jason,Liberal Party of Canada,New Brunswick Southwest,44th general election,2021-09-20,,,,,20.0,new brunswick southwest,13008,23.9,False
