In [1]:
# UT-TOR-DATA-PT-01-2020-U-C Team project #1
# (c) Boris Smirnov

## The problem


`table_tableau08.csv` file contains interesting data of how many votes were cast for each participating political party per province/territory. The problem is in the names of the rows and columns - they are made for a human to see, not for a computer to process.


## The goal


Using dictionaries **all_parties_2015.csv** and **provinces_ids.csv** transform **table_tableau08.csv**:

* rename columns to provinces names in English, so they can be addressed by those names in geodata branch of this projects
* rename all political parties in the first column to their abbreviations, so that it can be used as a key

I need this data to create pie charts for every province, that show vote distribution (top 4 parties and the rest). The plan is to add those pie charts into the info boxes on Google Maps. This task will be done in geodata branch of the project.


## Source files


* **table_tableau08.csv** - Table 8: Number of valid votes by political affiliation from [Elections Canada](https://www.elections.ca/content.aspx?section=res&dir=rep/off/42gedata&document=summary&lang=e)
* **all_parties_2015.csv** - existing dictionary of all parties names that particigated in 2015 elections (generated by *make_party_list.ipynb*)
* **provinces_ids.csv**- existing dictionary of all provinces and territiries names (generated by *make_party_list.ipynb*)

## Output file


* **votes_by_province_2015.csv** - a table with number of votes cast for each participating political party in each province

In [2]:
# Depedences and initialization
import pandas as pd

# Input files:
votes_party_fname = 'table_tableau08.csv'
parties_fname = 'all_parties_2015.csv'
provinces_fname = 'provinces_ids.csv'

# Output files:
votes_prov_fname = 'votes_by_province_2015.csv'

In [3]:
# Original data from Elections Canada
votes_party_df = pd.read_csv(votes_party_fname)
votes_party_df

Unnamed: 0,Political affiliation/Appartenance politique,N.L. Valid Votes/Votes valides T.-N.-L.,P.E.I. Valid Votes/Votes valides Î.-P.-É.,N.S. Valid Votes/Votes valides N.-É.,N.B. Valid Votes/Votes valides N.-B.,Que. Valid Votes/Votes valides Qc,Ont. Valid Votes/Votes valides Ont.,Man. Valid Votes/Votes valides Man.,Sask. Valid Votes/Votes valides Sask.,Alta. Valid Votes/Votes valides Alb.,B.C. Valid Votes/Votes valides C.-B.,Y.T. Valid Votes/Votes valides Yn,N.W.T. Valid Votes/Votes valides T.N.-O.,Nun. Valid Votes/Votes valides Nt
0,Alliance of the North/Alliance du Nord,0,0,0,0,136,0,0,0,0,0,0,0,0
1,Animal Alliance Environment Voters Party of Ca...,0,0,0,0,0,1499,0,0,0,200,0,0,0
2,Bloc Québécois/Bloc Québécois,0,0,0,0,821144,0,0,0,0,0,0,0,0
3,Canada Party/Parti Canada,0,0,0,0,0,0,0,271,0,0,0,0,0
4,Canadian Action Party/Parti action canadienne,0,0,0,0,0,401,0,0,0,0,0,0,0
5,Christian Heritage Party of Canada/Parti de l'...,0,295,0,0,679,7791,2021,0,2786,1660,0,0,0
6,Communist Party of Canada/Parti communiste du ...,140,0,151,0,545,1571,135,0,486,1365,0,0,0
7,Conservative Party of Canada/Parti conservateu...,26469,16900,93697,112070,709164,2293393,224527,267937,1150101,708010,4928,3481,2956
8,Democratic Advancement Party of Canada/Parti p...,0,0,0,0,0,0,0,0,1187,0,0,0,0
9,Forces et Démocratie/Forces et Démocratie,84,0,0,0,8059,131,0,0,0,0,0,0,0


In [4]:
# Basically, what I want is to rename everything in the first column and all the column names

# 1. Renaming the columns.

# For this we need provinces dictionary
provinces_df = pd.read_csv(provinces_fname)
provinces_df

Unnamed: 0,Province Id,Province Name (en),Province Name (fr),Abbreviation (en),Abbreviation (fr),Alpha code,Region name
0,10,Newfoundland and Labrador,Terre-Neuve-et-Labrador,N.L.,T.-N.-L.,NL,Atlantic
1,11,Prince Edward Island,Île-du-Prince-Édouard,P.E.I.,Î.-P.-É.,PE,Atlantic
2,12,Nova Scotia,Nouvelle-Écosse,N.S.,N.-É.,NS,Atlantic
3,13,New Brunswick,Nouveau-Brunswick,N.B.,N.-B.,NB,Atlantic
4,24,Quebec,Québec,Que.,Qc,QC,Quebec
5,35,Ontario,,Ont.,Ont.,ON,Ontario
6,46,Manitoba,,Man.,Man.,MB,Prairies
7,47,Saskatchewan,,Sask.,Sask.,SK,Prairies
8,48,Alberta,,Alta.,Alb.,AB,Prairies
9,59,British Columbia,Colombie-Britannique,B.C.,C.-B.,BC,British Columbia


In [5]:
# We need to decompose every column name and find a province name abbreviation, then look it up in the dictionary
# and find corresponding province name
rename_dct = {
    'Political affiliation/Appartenance politique': 'Party Id'
}

for c in votes_party_df.columns[1:]:
    # Here I use french abbreaviation, because...
    # Fun fact: Elections Canada doesn't always follow standard abbreviations.
    #    Their English abbreviation for Nunavut is 'Nun', while the standard one should be 'Nvt'
    #    French abbreviations are ok, so I'll use them
    abbr_fr = c.rsplit(' ', 1)[-1]
    prov_name = provinces_df.loc[provinces_df['Abbreviation (fr)'] == abbr_fr, 'Province Name (en)'].item()
    # Add old/new column name to the rename dictionary
    rename_dct[c] = prov_name 

votes_party_df.rename(columns=rename_dct, inplace=True)

In [6]:
# 2. Renaming parties to party Ids.

#For this we need parties dictionary
parties_df = pd.read_csv(parties_fname)
parties_df

Unnamed: 0,Id,Long Name (en),Long Name (fr),Short Name (fr),Short Name (en),Candidate Suffix,Custom Name
0,ATN,Alliance of the North,Alliance du Nord,ADN,ATN,ATN/ADN,
1,AA,Animal Alliance Environment Voters Party of Ca...,Animal Alliance Environment Voters Party of Ca...,Animal Alliance/Environment Voters,Animal Alliance/Environment Voters,Animal Alliance/Environment Voters/Animal All...,
2,BQ,Bloc Québécois,Bloc Québécois,Bloc Québécois,Bloc Québécois,Bloc Québécois/Bloc Québécois,Bloc Québécois
3,CAN,Canada Party,Parti Canada,Parti Canada,Canada Party,Canada Party/Parti Canada,
4,CAP,Canadian Action Party,Parti action canadienne,PAC,CAP,CAP/PAC,
5,CHP,Christian Heritage Party of Canada,Parti de l'Héritage Chrétien du Canada,Parti de l'Héritage Chrétien,Christian Heritage Party,Christian Heritage Party/Parti de l'Héritage ...,
6,COM,Communist Party of Canada,Parti communiste du Canada,Communiste,Communist,Communist/Communiste,
7,CPC,Conservative Party of Canada,Parti conservateur du Canada,Conservateur,Conservative,Conservative/Conservateur,Conservative
8,DAP,Democratic Advancement Party of Canada,Parti pour l'Avancement de la Démocratie au Ca...,Avancement de la Démocratie,Democratic Advancement,Democratic Advancement/Avancement de la Démoc...,
9,FD,Forces et Démocratie,Forces et Démocratie,Forces et Démocratie - Allier les forces de no...,Forces et Démocratie - Allier les forces de no...,Forces et Démocratie - Allier les forces de n...,


In [7]:
# Renaming parties is very easy, because basically the source file was used for creating this disctionary

votes_party_df['Party Id'] = parties_df.loc[
    votes_party_df['Party Id'] == parties_df['Long Name (en)'] + '/' + parties_df['Long Name (fr)'],
    'Id'
]

# The result
votes_party_df.to_csv(votes_prov_fname, index=False, encoding='utf-8')
votes_party_df

Unnamed: 0,Party Id,Newfoundland and Labrador,Prince Edward Island,Nova Scotia,New Brunswick,Quebec,Ontario,Manitoba,Saskatchewan,Alberta,British Columbia,Yukon,Northwest Territories,Nunavut
0,ATN,0,0,0,0,136,0,0,0,0,0,0,0,0
1,AA,0,0,0,0,0,1499,0,0,0,200,0,0,0
2,BQ,0,0,0,0,821144,0,0,0,0,0,0,0,0
3,CAN,0,0,0,0,0,0,0,271,0,0,0,0,0
4,CAP,0,0,0,0,0,401,0,0,0,0,0,0,0
5,CHP,0,295,0,0,679,7791,2021,0,2786,1660,0,0,0
6,COM,140,0,151,0,545,1571,135,0,486,1365,0,0,0
7,CPC,26469,16900,93697,112070,709164,2293393,224527,267937,1150101,708010,4928,3481,2956
8,DAP,0,0,0,0,0,0,0,0,1187,0,0,0,0
9,FD,84,0,0,0,8059,131,0,0,0,0,0,0,0
