In [42]:
import geopandas as gpd
import pandas as pd

In [43]:
gdf_grids = gpd.read_file('Data/Generated_grids/square_grids1000.shp')

In [44]:
gdf_grids.columns

Index(['grid_id', 'area', 'centroid', 'index_righ', 'CODEMAMROT', 'NOM',
       'TYPE', 'ABREV', 'int_area', 'geometry'],
      dtype='object')

In [45]:
gdf_boroughs = gpd.read_file('Data/Boroughs/LIMADMIN.shp')

In [46]:
gdf_boroughs.columns

Index(['MUNID', 'CODEID', 'CODEMAMROT', 'NOM', 'TYPE', 'ABREV', 'NUM', 'AIRE',
       'PERIM', 'geometry'],
      dtype='object')

In [47]:
gdf_boroughs.head(34)

Unnamed: 0,MUNID,CODEID,CODEMAMROT,NOM,TYPE,ABREV,NUM,AIRE,PERIM,geometry
0,66023,11,REM05,Outremont,Arrondissement,OM,5,3813356.0,10836.67,"POLYGON ((-73.62078 45.52365, -73.62020 45.524..."
1,66023,22,REM17,LaSalle,Arrondissement,LS,18,25197270.0,25259.849,"POLYGON ((-73.66610 45.42180, -73.66601 45.428..."
2,66023,62,66072,Mont-Royal,Ville liée,MR,2,7445560.0,18314.038,"POLYGON ((-73.65075 45.52631, -73.65036 45.526..."
3,66023,9,REM19,Ville-Marie,Arrondissement,VM,20,21500630.0,26585.959,"POLYGON ((-73.53013 45.53476, -73.53019 45.534..."
4,66023,5,REM21,Le Plateau-Mont-Royal,Arrondissement,PM,22,8151665.0,13158.328,"POLYGON ((-73.55923 45.53990, -73.56017 45.538..."
5,66023,54,66062,Hampstead,Ville liée,HS,10,1768055.0,5875.848,"POLYGON ((-73.65601 45.47893, -73.65601 45.481..."
6,66023,63,REM20,Le Sud-Ouest,Arrondissement,SO,21,18144270.0,29633.161,"POLYGON ((-73.62908 45.44839, -73.63197 45.449..."
7,66023,57,REM33,Rivière-des-Prairies-Pointe-aux-Trembles,Arrondissement,RP,19,50047000.0,38573.067,"POLYGON ((-73.62475 45.63359, -73.62469 45.633..."
8,66023,28,REM27,Lachine,Arrondissement,LC,17,23127790.0,25399.526,"POLYGON ((-73.72299 45.42179, -73.72085 45.438..."
9,66023,51,66087,Dorval,Ville liée,DV,1,28156150.0,32357.566,"POLYGON ((-73.79470 45.48181, -73.78960 45.484..."


In [48]:
df_property_assessment = pd.read_csv(
    'Data/Processed_Datasets/Cleaned_datasets/df_property_assessment.csv')

In [49]:
df_fe_stc = gdf_grids.loc[:, ['CODEMAMROT']].drop_duplicates()

In [50]:
df_fe_stc.shape

(34, 1)

### Number of property assessments per borough or affiliated city

In [51]:
pa_counts = df_property_assessment.groupby(
    'CODEMAMROT').size().reset_index(name='PROPERTY_ASSESSMENT_COUNT')

In [52]:
df_fe_stc = df_fe_stc.merge(pa_counts,
                            on='CODEMAMROT',
                            how='left')

In [53]:
df_fe_stc.describe(include='all')

Unnamed: 0,CODEMAMROT,PROPERTY_ASSESSMENT_COUNT
count,34,34.0
unique,34,
top,REM33,
freq,1,
mean,,14752.676471
std,,11692.637165
min,,74.0
25%,,6179.5
50%,,11948.5
75%,,25930.25


### Number of property assessments per borough or affiliated city for each UEF category

In [54]:
# Store a dataframe without the 'Infrastructure' type
df_pa = df_property_assessment[df_property_assessment['CATEGORIE_UEF']
                               != 'Infrastructure']

In [55]:
pa_counts = pd.pivot_table(df_pa, index='CODEMAMROT',
                           columns='CATEGORIE_UEF', values='ID_UEV', aggfunc='count')
pa_counts.columns = [
    f'{col}_PROPERTY_ASSESSMENT_COUNT' for col in pa_counts.columns]
df_fe_stc = df_fe_stc.join(pa_counts, on='CODEMAMROT')
df_fe_stc = df_fe_stc.fillna(0)

In [56]:
df_fe_stc.describe(include='all')

Unnamed: 0,CODEMAMROT,PROPERTY_ASSESSMENT_COUNT,Condominium_PROPERTY_ASSESSMENT_COUNT,Régulier_PROPERTY_ASSESSMENT_COUNT
count,34,34.0,34.0,34.0
unique,34,,,
top,REM33,,,
freq,1,,,
mean,,14752.676471,5383.970588,9368.676471
std,,11692.637165,6963.531442,7029.16265
min,,74.0,0.0,74.0
25%,,6179.5,495.5,4345.5
50%,,11948.5,3150.5,7914.0
75%,,25930.25,8430.75,12712.25


### Average height above ground per borough or affiliated city

In [57]:
df_pa = df_property_assessment.dropna(subset=['ETAGE_HORS_SOL'])

In [58]:
df_grouped_avg_height= df_pa.groupby('CODEMAMROT')['ETAGE_HORS_SOL'].mean().rename('HEIGHT_ABOVE_GROUND_MEAN').reset_index()
print(df_grouped_avg_height)

   CODEMAMROT  HEIGHT_ABOVE_GROUND_MEAN
0       66007                  1.511873
1       66032                  1.906144
2       66047                  1.883526
3       66058                  1.515746
4       66062                  1.834877
5       66072                  1.755742
6       66087                  1.349052
7       66092                  1.052632
8       66097                  1.627905
9       66102                  1.868810
10      66107                  1.659620
11      66112                  1.564505
12      66117                  1.812587
13      66127                  1.553371
14      66142                  1.719910
15      REM05                  1.705458
16      REM09                  1.561592
17      REM12                  1.631122
18      REM14                  1.740404
19      REM15                  1.596520
20      REM16                  1.813226
21      REM17                  1.696316
22      REM19                  1.469965
23      REM20                  1.544283


In [59]:
df_fe_stc = df_fe_stc.merge(df_grouped_avg_height,
                            on='CODEMAMROT',
                            how='left')

In [60]:
df_fe_stc = df_fe_stc.fillna(0)

In [61]:
df_fe_stc.describe(include='all')

Unnamed: 0,CODEMAMROT,PROPERTY_ASSESSMENT_COUNT,Condominium_PROPERTY_ASSESSMENT_COUNT,Régulier_PROPERTY_ASSESSMENT_COUNT,HEIGHT_ABOVE_GROUND_MEAN
count,34,34.0,34.0,34.0,34.0
unique,34,,,,
top,REM33,,,,
freq,1,,,,
mean,,14752.676471,5383.970588,9368.676471,1.669252
std,,11692.637165,6963.531442,7029.16265,0.178831
min,,74.0,0.0,74.0,1.052632
25%,,6179.5,495.5,4345.5,1.555426
50%,,11948.5,3150.5,7914.0,1.678461
75%,,25930.25,8430.75,12712.25,1.812223


### Average dwelling found in property assessments per borough or affiliated city

In [62]:
df_pa = df_property_assessment.dropna(subset=['NOMBRE_LOGEMENT'])

In [63]:
df_grouped_avg_dwelling = df_pa.groupby('CODEMAMROT')['NOMBRE_LOGEMENT'].mean(
).rename('DWELLING_MEAN').reset_index()
print(df_grouped_avg_dwelling)

   CODEMAMROT  DWELLING_MEAN
0       66007       2.120000
1       66032       1.937931
2       66047       1.313609
3       66058       2.105851
4       66062       1.468920
5       66072       1.479409
6       66087       1.549345
7       66092       1.000000
8       66097       1.434260
9       66102       1.061324
10      66107       1.061777
11      66112       1.057791
12      66117       1.582163
13      66127       1.106017
14      66142       1.251707
15      REM05       1.807292
16      REM09       2.069064
17      REM12       2.131014
18      REM14       2.766502
19      REM15       1.887186
20      REM16       3.037400
21      REM17       2.291821
22      REM19       2.296771
23      REM20       2.011925
24      REM21       2.861847
25      REM22       2.512385
26      REM23       2.392301
27      REM24       2.680339
28      REM25       2.928525
29      REM27       2.101962
30      REM31       1.365384
31      REM32       1.291819
32      REM33       1.526914
33      REM34 

In [64]:
df_fe_stc = df_fe_stc.merge(df_grouped_avg_dwelling,
                            on='CODEMAMROT',
                            how='left')

In [65]:
df_fe_stc = df_fe_stc.fillna(0)

In [66]:
df_fe_stc.describe(include='all')

Unnamed: 0,CODEMAMROT,PROPERTY_ASSESSMENT_COUNT,Condominium_PROPERTY_ASSESSMENT_COUNT,Régulier_PROPERTY_ASSESSMENT_COUNT,HEIGHT_ABOVE_GROUND_MEAN,DWELLING_MEAN
count,34,34.0,34.0,34.0,34.0,34.0
unique,34,,,,,
top,REM33,,,,,
freq,1,,,,,
mean,,14752.676471,5383.970588,9368.676471,1.669252,1.909239
std,,11692.637165,6963.531442,7029.16265,0.178831,0.652216
min,,74.0,0.0,74.0,1.052632,1.0
25%,,6179.5,495.5,4345.5,1.555426,1.382603
50%,,11948.5,3150.5,7914.0,1.678461,1.912559
75%,,25930.25,8430.75,12712.25,1.812223,2.295534


### Average construction year (void years after 2023) per borough or affiliated city

In [67]:
df_pa = df_property_assessment[df_property_assessment['ANNEE_CONSTRUCTION'] <= 2023]

In [68]:
# Mean construction year per district
df_grouped_avg_construction_year = df_pa.groupby('CODEMAMROT')['ANNEE_CONSTRUCTION'].mean(
).rename('CONSTRUCTION_YEAR_MEAN').reset_index()
print(df_grouped_avg_construction_year)

   CODEMAMROT  CONSTRUCTION_YEAR_MEAN
0       66007             1964.521849
1       66032             1932.048753
2       66047             1940.835907
3       66058             1975.123788
4       66062             1959.786725
5       66072             1962.710614
6       66087             1970.901922
7       66092             1940.210526
8       66097             1973.496250
9       66102             1983.986846
10      66107             1968.710984
11      66112             1967.930314
12      66117             1967.026174
13      66127             1952.226471
14      66142             1978.006600
15      REM05             1938.635473
16      REM09             1980.722426
17      REM12             1972.693531
18      REM14             1978.315957
19      REM15             1983.977006
20      REM16             1966.620623
21      REM17             1978.660199
22      REM19             1979.474658
23      REM20             1972.690572
24      REM21             1937.150205
25      REM2

In [69]:
df_fe_stc = df_fe_stc.merge(df_grouped_avg_construction_year,
                            on='CODEMAMROT',
                            how='left')

In [70]:
df_fe_stc = df_fe_stc.fillna(0)

In [71]:
df_fe_stc.describe(include='all')

Unnamed: 0,CODEMAMROT,PROPERTY_ASSESSMENT_COUNT,Condominium_PROPERTY_ASSESSMENT_COUNT,Régulier_PROPERTY_ASSESSMENT_COUNT,HEIGHT_ABOVE_GROUND_MEAN,DWELLING_MEAN,CONSTRUCTION_YEAR_MEAN
count,34,34.0,34.0,34.0,34.0,34.0,34.0
unique,34,,,,,,
top,REM33,,,,,,
freq,1,,,,,,
mean,,14752.676471,5383.970588,9368.676471,1.669252,1.909239,1966.164498
std,,11692.637165,6963.531442,7029.16265,0.178831,0.652216,14.510131
min,,74.0,0.0,74.0,1.052632,1.0,1932.048753
25%,,6179.5,495.5,4345.5,1.555426,1.382603,1960.097888
50%,,11948.5,3150.5,7914.0,1.678461,1.912559,1968.309573
75%,,25930.25,8430.75,12712.25,1.812223,2.295534,1978.238618


### Average land area (m2) per borough or affiliated city

In [72]:
df_pa = df_property_assessment.dropna(subset=['SUPERFICIE_TERRAIN'])

In [73]:
df_grouped_avg_land_area = df_pa.groupby('CODEMAMROT')['SUPERFICIE_TERRAIN'].mean(
).rename('LAND_AREA_MEAN').reset_index()
print(df_grouped_avg_land_area)

   CODEMAMROT  LAND_AREA_MEAN
0       66007     8320.717742
1       66032      523.243367
2       66047      661.359606
3       66058      796.065181
4       66062      665.220986
5       66072      861.466750
6       66087     2532.455302
7       66092     2441.256757
8       66097     1317.279161
9       66102     1200.200483
10      66107     1209.883205
11      66112     3125.256887
12      66117     5728.167168
13      66127    13474.906615
14      66142      796.601633
15      REM05      474.270036
16      REM09      973.738217
17      REM12      402.040483
18      REM14      758.332242
19      REM15     1373.713573
20      REM16      560.342243
21      REM17      704.992400
22      REM19      319.157880
23      REM20      487.410665
24      REM21      218.302654
25      REM22      607.894980
26      REM23      602.361625
27      REM24      354.549482
28      REM25      464.919686
29      REM27     1214.685191
30      REM31     1083.008362
31      REM32     3437.354648
32      RE

In [74]:
df_fe_stc = df_fe_stc.merge(df_grouped_avg_land_area,
                            on='CODEMAMROT',
                            how='left')

In [75]:
df_fe_stc = df_fe_stc.fillna(0)

In [76]:
df_fe_stc.describe(include='all')

Unnamed: 0,CODEMAMROT,PROPERTY_ASSESSMENT_COUNT,Condominium_PROPERTY_ASSESSMENT_COUNT,Régulier_PROPERTY_ASSESSMENT_COUNT,HEIGHT_ABOVE_GROUND_MEAN,DWELLING_MEAN,CONSTRUCTION_YEAR_MEAN,LAND_AREA_MEAN
count,34,34.0,34.0,34.0,34.0,34.0,34.0,34.0
unique,34,,,,,,,
top,REM33,,,,,,,
freq,1,,,,,,,
mean,,14752.676471,5383.970588,9368.676471,1.669252,1.909239,1966.164498,1744.473383
std,,11692.637165,6963.531442,7029.16265,0.178831,0.652216,14.510131,2651.686684
min,,74.0,0.0,74.0,1.052632,1.0,1932.048753,218.302654
25%,,6179.5,495.5,4345.5,1.555426,1.382603,1960.097888,568.10927
50%,,11948.5,3150.5,7914.0,1.678461,1.912559,1968.309573,796.333407
75%,,25930.25,8430.75,12712.25,1.812223,2.295534,1978.238618,1291.630669


### Average building area (m2) per borough or affiliated city

In [77]:
df_pa = df_property_assessment.dropna(subset=['SUPERFICIE_BATIMENT'])

In [78]:
df_grouped_avg_building_area = df_pa.groupby('CODEMAMROT')['SUPERFICIE_BATIMENT'].mean(
).rename('BUILDING_AREA_MEAN').reset_index()
print(df_grouped_avg_building_area)

   CODEMAMROT  BUILDING_AREA_MEAN
0       66007          602.407966
1       66032         1395.991012
2       66047          218.814766
3       66058         1473.793512
4       66062          300.139693
5       66072          654.273231
6       66087          454.959005
7       66092          100.568966
8       66097          310.166871
9       66102          284.105708
10      66107          228.496283
11      66112          364.019383
12      66117          557.784615
13      66127          315.765043
14      66142          267.512797
15      REM05          809.643747
16      REM09         1056.200716
17      REM12         1249.160882
18      REM14          466.943374
19      REM15          847.813642
20      REM16          331.110782
21      REM17          681.754672
22      REM19         1720.661188
23      REM20         1181.057356
24      REM21         1362.174027
25      REM22          521.481488
26      REM23         2226.556192
27      REM24          738.912896
28      REM25 

In [79]:
df_fe_stc = df_fe_stc.merge(df_grouped_avg_building_area,
                            on='CODEMAMROT',
                            how='left')

In [80]:
df_fe_stc = df_fe_stc.fillna(0)

In [81]:
df_fe_stc.describe(include='all')

Unnamed: 0,CODEMAMROT,PROPERTY_ASSESSMENT_COUNT,Condominium_PROPERTY_ASSESSMENT_COUNT,Régulier_PROPERTY_ASSESSMENT_COUNT,HEIGHT_ABOVE_GROUND_MEAN,DWELLING_MEAN,CONSTRUCTION_YEAR_MEAN,LAND_AREA_MEAN,BUILDING_AREA_MEAN
count,34,34.0,34.0,34.0,34.0,34.0,34.0,34.0,34.0
unique,34,,,,,,,,
top,REM33,,,,,,,,
freq,1,,,,,,,,
mean,,14752.676471,5383.970588,9368.676471,1.669252,1.909239,1966.164498,1744.473383,695.843108
std,,11692.637165,6963.531442,7029.16265,0.178831,0.652216,14.510131,2651.686684,501.917888
min,,74.0,0.0,74.0,1.052632,1.0,1932.048753,218.302654,100.568966
25%,,6179.5,495.5,4345.5,1.555426,1.382603,1960.097888,568.10927,311.566414
50%,,11948.5,3150.5,7914.0,1.678461,1.912559,1968.309573,796.333407,539.633052
75%,,25930.25,8430.75,12712.25,1.812223,2.295534,1978.238618,1291.630669,953.587339


# Output file

In [82]:
df_fe_stc.to_csv(
    'Data/Processed_Datasets/Combined_tables/df_fe_property_assessments.csv')