In [9]:
# This script assembles all the preprocessed provincial data files
# into a single file for the whole country.

import pandas as pd
import time

locationList = ['PEI', 'NS', 'NB', 'NL', 'QC', 'ON', 'MB', 'SK', 'AB', 'BC', 'YK', 'NT', 'NU']
# locationList = ['PEI', 'NS', 'NB']
years = [2016, 2021]
encoding = 'ISO-8859-1'


# Read in the data for each province
start = time.time()
for year in years:
    provinceDFs = []
    for location in locationList:
            print(f"Reading in data for {location} {year}...")
            filePath = f'../processedData/processed_{location}_{year}.csv'
            df = pd.read_csv(filePath, encoding=encoding, low_memory=False)

            # Add a column for the province
            df['Province'] = location
            provinceDFs.append(df)

    # Concatenate the dataframes
    print(f'Concatenating dataframes for {year}...')
    allData = pd.concat(provinceDFs, ignore_index=False)
    # Sort by 'GEO_NAME'
    allData = allData.sort_values(by='GEO_NAME')

    # Let's add a column for the province so we can deal with duplicated community names

    # Write the data to a file
    allData.to_csv(f'../processedData/processed_Canada_{year}.csv', index=True)


end = time.time()
print(f'Data assembly took {end - start} seconds.')

Reading in data for PEI 2016...
Added province column.
Reading in data for NS 2016...
Added province column.
Reading in data for NB 2016...
Added province column.
Reading in data for NL 2016...
Added province column.
Reading in data for QC 2016...
Added province column.
Reading in data for ON 2016...
Added province column.
Reading in data for MB 2016...
Added province column.
Reading in data for SK 2016...
Added province column.
Reading in data for AB 2016...
Added province column.
Reading in data for BC 2016...
Added province column.
Reading in data for YK 2016...
Added province column.
Reading in data for NT 2016...
Added province column.
Reading in data for NU 2016...
Added province column.
Concatenating dataframes for 2016...
Reading in data for PEI 2021...
Added province column.
Reading in data for NS 2021...
Added province column.
Reading in data for NB 2021...
Added province column.
Reading in data for NL 2021...
Added province column.
Reading in data for QC 2021...
Added provin