In [55]:
import geopandas as gpd
import pandas as pd

In [56]:
df_population = pd.read_csv(
    'Data/Population/census_proportionate_v5.csv', encoding='ISO-8859-1')

In [57]:
df_population.columns

Index(['CODEMAMROT', 'NOM', 'YEAR', 'POPULATION'], dtype='object')

In [58]:
gdf_boroughs = gpd.read_file('Data/Boroughs/LIMADMIN.shp')


In [59]:
gdf_boroughs.columns

Index(['MUNID', 'CODEID', 'CODEMAMROT', 'NOM', 'TYPE', 'ABREV', 'NUM', 'AIRE',
       'PERIM', 'geometry'],
      dtype='object')

### Population total per borough or affiliated city and year

In [60]:
df_population.head()

Unnamed: 0,CODEMAMROT,NOM,YEAR,POPULATION
0,REM23,Ahuntsic-Cartierville,2011,126891.0
1,REM09,Anjou,2011,41928.0
2,REM34,CÃ´te-des-Neiges-Notre-Dame-de-GrÃ¢ce,2011,165031.0
3,REM27,Lachine,2011,41616.0
4,REM17,LaSalle,2011,74276.0


In [61]:
df_population = df_population.rename(
    columns={'CODEMARMOT': 'CODEMAMROT'})

In [62]:
df_grouped_population = df_population.groupby(['NOM', 'CODEMAMROT', 'YEAR'])[
    'POPULATION'].sum().rename('POPULATION_SUM').reset_index()
print(df_grouped_population)

                       NOM CODEMAMROT  YEAR  POPULATION_SUM
0    Ahuntsic-Cartierville      REM23  2011        126891.0
1    Ahuntsic-Cartierville      REM23  2012        128361.8
2    Ahuntsic-Cartierville      REM23  2013        129832.6
3    Ahuntsic-Cartierville      REM23  2014        131303.4
4    Ahuntsic-Cartierville      REM23  2015        132774.2
..                     ...        ...   ...             ...
437              Westmount      66032  2019         19919.6
438              Westmount      66032  2020         19788.8
439              Westmount      66032  2021         19658.0
440              Westmount      66032  2022         19527.2
441              Westmount      66032  2023         19396.4

[442 rows x 4 columns]


In [63]:
df_grouped_population = df_grouped_population.rename(
    columns={'borough': 'NOM', 'year': 'YEAR'})
df_grouped_population.columns

Index(['NOM', 'CODEMAMROT', 'YEAR', 'POPULATION_SUM'], dtype='object')

### Population density per borough or affiliated city and year

In [64]:
df_grouped_population = df_grouped_population.merge(gdf_boroughs,
                                                    on='CODEMAMROT',
                                                    how='left')

In [65]:
df_grouped_population.head()

Unnamed: 0,NOM_x,CODEMAMROT,YEAR,POPULATION_SUM,MUNID,CODEID,NOM_y,TYPE,ABREV,NUM,AIRE,PERIM,geometry
0,Ahuntsic-Cartierville,REM23,2011,126891.0,66023,4,Ahuntsic-Cartierville,Arrondissement,AC,24,25571190.0,37790.097,"POLYGON ((-73.76363 45.51193, -73.76354 45.511..."
1,Ahuntsic-Cartierville,REM23,2012,128361.8,66023,4,Ahuntsic-Cartierville,Arrondissement,AC,24,25571190.0,37790.097,"POLYGON ((-73.76363 45.51193, -73.76354 45.511..."
2,Ahuntsic-Cartierville,REM23,2013,129832.6,66023,4,Ahuntsic-Cartierville,Arrondissement,AC,24,25571190.0,37790.097,"POLYGON ((-73.76363 45.51193, -73.76354 45.511..."
3,Ahuntsic-Cartierville,REM23,2014,131303.4,66023,4,Ahuntsic-Cartierville,Arrondissement,AC,24,25571190.0,37790.097,"POLYGON ((-73.76363 45.51193, -73.76354 45.511..."
4,Ahuntsic-Cartierville,REM23,2015,132774.2,66023,4,Ahuntsic-Cartierville,Arrondissement,AC,24,25571190.0,37790.097,"POLYGON ((-73.76363 45.51193, -73.76354 45.511..."


In [66]:
df_grouped_population.shape

(442, 13)

In [67]:
df_grouped_population['POPULATION_DENSITY'] = df_grouped_population['POPULATION_SUM'] / \
    df_grouped_population['AIRE']

In [68]:
df_grouped_population.columns

Index(['NOM_x', 'CODEMAMROT', 'YEAR', 'POPULATION_SUM', 'MUNID', 'CODEID',
       'NOM_y', 'TYPE', 'ABREV', 'NUM', 'AIRE', 'PERIM', 'geometry',
       'POPULATION_DENSITY'],
      dtype='object')

In [69]:
df_grouped_population = df_grouped_population[['CODEMAMROT',
                                               'YEAR',
                                               'POPULATION_SUM',
                                               'AIRE',
                                               'POPULATION_DENSITY']]

In [70]:
df_grouped_population.describe(include='all')

Unnamed: 0,CODEMAMROT,YEAR,POPULATION_SUM,AIRE,POPULATION_DENSITY
count,442,442.0,442.0,442.0,442.0
unique,34,,,,
top,REM23,,,,
freq,13,,,,
mean,,2017.0,57530.137104,18177970.0,0.003633
std,,3.745897,48707.383334,11941090.0,0.002921
min,,2011.0,5.0,180508.9,2.8e-05
25%,,2014.0,19082.55,8151665.0,0.000926
50%,,2017.0,43198.3,16181940.0,0.003122
75%,,2020.0,91785.05,25197270.0,0.005062


In [71]:
df_grouped_population['YEAR'] = df_grouped_population['YEAR'].astype('object')

In [72]:
df_grouped_population.dtypes

CODEMAMROT             object
YEAR                   object
POPULATION_SUM        float64
AIRE                  float64
POPULATION_DENSITY    float64
dtype: object

# Output file

In [74]:
df_grouped_population.to_csv(
    'Data/Processed_Datasets/Combined_tables/df_fe_population.csv')