In [1]:
import pandas as pd

In [43]:
year = 2021
election = "federal"

In [27]:
# https://www.elections.ca/content.aspx?section=res&dir=cir/list&document=index338&lang=e
districts_2021 = {
    "10": "Newfoundland and Labrador",
    "11": "Prince Edward Island",
    "12": "Nova Scotia",
    "13": "New Brunswick",
    "24": "Quebec",
    "35": "Ontario",
    "46": "Manitoba",
    "47": "Saskatchewan",
    "48": "Alberta",
    "59": "British Columbia",
    "60": "Yukon",
    "61": "Northwest Territories",
    "62": "Nunavut"
}

def create_polling_id(data, columns=[]):
    output = data[columns[0]].fillna("###").astype(str).copy()

    for column in columns[1:]:
        output += "-" + data[column].fillna("###").astype(str)
    return output

In [33]:
df = pd.read_csv("../data/Canada/2021_first_round.csv")
# df["candidate"] = df.apply(lambda x: f'{x["Candidate’s First Name/Prénom du candidat"]} {x["Candidate’s Family Name/Nom de famille du candidat"]}', axis=1)
df = df.rename(columns={
    "Candidate Poll Votes Count/Votes du candidat pour le bureau": "value",
    "Electoral District Number/Numéro de circonscription": "district_id",
    "Political Affiliation Name_English/Appartenance politique_Anglais": "candidate",
    "Electoral District Name_English/Nom de circonscription_Anglais": "district",
    "Electoral District Name_French/Nom de circonscription_Français": "district_fr"
})
df["polling_id"] = create_polling_id(
    df, 
    columns=["district_id", "Polling Station Number/Numéro du bureau de scrutin", "Polling Station Name/Nom du bureau de scrutin"])
df.head()

Unnamed: 0,district_id,district,district_fr,Polling Station Number/Numéro du bureau de scrutin,Polling Station Name/Nom du bureau de scrutin,Void Poll Indicator/Indicateur de bureau supprimé,No Poll Held Indicator/Indicateur de bureau sans scrutin,Merge With/Fusionné avec,Rejected Ballots for Polling Station/Bulletins rejetés du bureau,Electors for Polling Station/Électeurs du bureau,Candidate’s Family Name/Nom de famille du candidat,Candidate’s Middle Name/Second prénom du candidat,Candidate’s First Name/Prénom du candidat,candidate,Political Affiliation Name_French/Appartenance politique_Français,Incumbent Indicator/Indicateur_Candidat sortant,Elected Candidate Indicator/Indicateur du candidat élu,value,polling_id
0,10001,Avalon,Avalon,1,Freshwater,N,N,,2,106,Chapman,,Matthew,Conservative,Conservateur,N,N,28,10001- 1-Freshwater
1,10001,Avalon,Avalon,1,Freshwater,N,N,,2,106,Davis,,Carolyn,NDP-New Democratic Party,NPD-Nouveau Parti démocratique,N,N,13,10001- 1-Freshwater
2,10001,Avalon,Avalon,1,Freshwater,N,N,,2,106,McDonald,,Ken,Liberal,Libéral,N,Y,55,10001- 1-Freshwater
3,10001,Avalon,Avalon,1,Freshwater,N,N,,2,106,Stewart,,Lainie,People's Party - PPC,Parti populaire - PPC,N,N,4,10001- 1-Freshwater
4,10001,Avalon,Avalon,2,Victoria,N,N,,0,330,Chapman,,Matthew,Conservative,Conservateur,N,N,56,10001- 2-Victoria


In [34]:
tt = df.groupby(["polling_id", "candidate"]).agg({"value": "sum"})
tt["rate"] = tt.groupby(level=[0], group_keys=False).apply(lambda x: x/x.sum())
tt = tt.reset_index()

tt["rank"] = tt.groupby("polling_id")["value"].rank("min", ascending=False).astype(int)

tt

Unnamed: 0,polling_id,candidate,value,rate,rank
0,10001- 1-Freshwater,Conservative,28,0.280000,2
1,10001- 1-Freshwater,Liberal,55,0.550000,1
2,10001- 1-Freshwater,NDP-New Democratic Party,13,0.130000,3
3,10001- 1-Freshwater,People's Party - PPC,4,0.040000,4
4,10001- 10-Carbonear,Conservative,35,0.406977,2
...,...,...,...,...,...
472683,62001- S/R 1-SVR Group 1/RÉS Groupe 1,Liberal,94,0.445498,1
472684,62001- S/R 1-SVR Group 1/RÉS Groupe 1,NDP-New Democratic Party,83,0.393365,2
472685,62001- S/R 2-SVR Group 2/RÉS Groupe 2,Conservative,66,0.168798,3
472686,62001- S/R 2-SVR Group 2/RÉS Groupe 2,Liberal,141,0.360614,2


In [35]:
tt.groupby("candidate").agg({"value": "sum"})

Unnamed: 0_level_0,value
candidate,Unnamed: 1_level_1
Animal Protection Party,2546
Bloc Québécois,1301615
CFF - Canada's Fourth Front,105
Centrist,648
Christian Heritage Party,8985
Communist,4700
Conservative,5747410
Free Party Canada,47252
Green Party,396988
Independent,25605


In [36]:
tt.to_csv(f"../data_output/Canada/{year}_{election}.csv.gz", compression="gzip", index=False)

In [45]:
df_location = df[["polling_id", "district_id", "district", "district_fr"]].drop_duplicates()
df_location["state_id"] = df_location["district_id"].astype(str).str[0:2].copy()
df_location["state"] = df_location["state_id"].replace(districts_2021)
df_location.to_csv(f"../data_output/Canada/{year}_{election}_location.csv.gz", compression="gzip", index=False)
df_location.head()

Unnamed: 0,polling_id,district_id,district,district_fr,state_id,state
0,10001- 1-Freshwater,10001,Avalon,Avalon,10,Newfoundland and Labrador
4,10001- 2-Victoria,10001,Avalon,Avalon,10,Newfoundland and Labrador
8,10001- 3-Victoria,10001,Avalon,Avalon,10,Newfoundland and Labrador
12,10001- 4-Victoria,10001,Avalon,Avalon,10,Newfoundland and Labrador
16,10001- 5-Victoria,10001,Avalon,Avalon,10,Newfoundland and Labrador
