In [1]:
# UT-TOR-DATA-PT-01-2020-U-C Week 11
# Web Design Challenge
# (c) Boris Smirnov

# This notebook creates translation table from FED names to FED ids

# In 2003 new representation order (RO) was defined - new electoral distict boundaries and names.
# However, only in 2008 those districts were assigned numerical codes. Before that, in 2006 and 2004,
# they were identified only by names.
# On top of that, 36 FEDs were renamed somewhere between 2004 and 2006 elections.
# Fortunately, elections Canada supplied a table with renaming rules (table14.csv).
# However in order to use it, one has to fix it in the following ways:
# - convert to utf-8
# - replace '—' to '--'
# - replace ' ,1/9/2004' to ',1/9/2004' (remove extra space before field separator)
# - remove extra space before field separator in 'Churchill River/Rivière Churchill ,'
# After that, the file can be used to map new names (from 2008 data) to the old ones

In [7]:
import pandas as pd

In [8]:
ren_rules_fname = '2004/table14_fixed!.csv'
feds2008_fname = '2008/table_tableau11.csv'
feds2004_fname = '2004/table_tableau11.csv'
dst_fname = 'ro2003.csv'

In [9]:
ren_rules_df = pd.read_csv(ren_rules_fname).iloc[:, 1:3].copy()
ren_rules_df.columns = ['Old', 'New']
rename_dict = dict(zip(ren_rules_df['New'], ren_rules_df['Old']))

In [10]:
feds2008_df = pd.read_csv(feds2008_fname).iloc[:, 1:3].copy()
feds2008_df.columns = ['FED Name 2008', 'FED Id']
feds2008_df['FED Name 2004'] = feds2008_df['FED Name 2008'].apply(
    lambda name: rename_dict[name] if name in rename_dict else name)

In [11]:
ro2003_df = feds2008_df[['FED Id', 'FED Name 2004', 'FED Name 2008']]
ro2003_df

Unnamed: 0,FED Id,FED Name 2004,FED Name 2008
0,10001,Avalon,Avalon
1,10002,Bonavista--Exploits,Bonavista--Gander--Grand Falls--Windsor
2,10003,Humber--St. Barbe--Baie Verte,Humber--St. Barbe--Baie Verte
3,10004,Labrador,Labrador
4,10005,Random--Burin--St. George's,Random--Burin--St. George's
...,...,...,...
303,59035,Victoria,Victoria
304,59036,West Vancouver--Sunshine Coast,West Vancouver--Sunshine Coast--Sea to Sky Cou...
305,60001,Yukon,Yukon
306,61001,Western Arctic,Western Arctic


In [12]:
ro2003_df.to_csv(dst_fname, index=False)