In [310]:
# 2019 PIPELINE
# cols must match this format 

""" ['AREA_NAME', 'CLASSIFICATION', 'CLASSIFICATION_CODE', 'geometry_wkt',
       'geometry_type', 'Area', 'Bachelor Leased', 'bachelor_avg_lease_rate',
       '1_bedrooms_leased', '1_bed_room_avg_lease_rate', '2_bedrooms_leased',
       '2_bedrooms_avg_lease_rate', '3_bedrooms_leased',
       '3_bedrooms_avg_lease_rate', 'area_sq_meters', 'perimeter_meters',
       'park_count', 'ASSAULT_2024', 'ASSAULT_RATE_2024', 'AUTOTHEFT_2024',
       'AUTOTHEFT_RATE_2024', 'BIKETHEFT_RATE_2024', 'BREAKENTER_2024',
       'BIKETHEFT_RATE_2024_1', 'HOMICIDE_2024', 'HOMICIDE_RATE_2024',
       'ROBBERY_2024', 'ROBBERY_RATE_2024', 'SHOOTING_2024',
       'SHOOTING_RATE_2024', 'THEFTFROMMV_2024', 'THEFTFROMMV_RATE_2024',
       'THEFTOVER_2024', 'THEFTOVER_RATE_2024', 'POPULATION_2024',
       'total_stop_count', 'avg_stop_frequency', 'max_stop_frequency',
       'total_line_length_meters', 'transit_line_density',
       'distinct_route_count'] """

"""
Static cols: 

AREA_NAME'
'CLASSIFICATION'
'CLASSIFICATION_CODE'
'geometry_wkt'
'geometry_type'
'Area'
'area_sq_meters'
'perimeter_meters'
'park_count'
'total_stop_count'
'avg_stop_frequency'
'max_stop_frequency'
'total_line_length_meters'
'transit_line_density'
'distinct_route_count'


2019 specific cols in og dataset

'ASSAULT_2019', 
'ASSAULT_RATE_2019',
 'AUTOTHEFT_2019', '
 AUTOTHEFT_RATE_2019', 
 'BIKETHEFT_RATE_2019', 
 'BREAKENTER_2019', 
 'BIKETHEFT_RATE_2019_1', 
 'HOMICIDE_2019', 
 'HOMICIDE_RATE_2019', 
 'ROBBERY_2019', 
 'ROBBERY_RATE_2019', 
 'SHOOTING_2019',
 'SHOOTING_RATE_2019', 
 'THEFTFROMMV_2019', 
 'THEFTFROMMV_RATE_2019', 
 'THEFTOVER_2019', 
 'THEFTOVER_RATE_2019', 

 'POPULATION_2019' (not in og dataset )


"""

# read from tor_neighborgood_condorental.csv, take all static rows, and year coded rows for 2019. 
# create population row, fill with 0s if not found in og dataset

#  'Bachelor Leased', 'bachelor_avg_lease_rate', 1_bedrooms_leased', '1_bed_room_avg_lease_rate', '2_bedrooms_leased', '2_bedrooms_avg_lease_rate', '3_bedrooms_leased','3_bedrooms_avg_lease_rate should be created with null values, I will manually join these after 

"\nStatic cols: \n\nAREA_NAME'\n'CLASSIFICATION'\n'CLASSIFICATION_CODE'\n'geometry_wkt'\n'geometry_type'\n'Area'\n'area_sq_meters'\n'perimeter_meters'\n'park_count'\n'total_stop_count'\n'avg_stop_frequency'\n'max_stop_frequency'\n'total_line_length_meters'\n'transit_line_density'\n'distinct_route_count'\n\n\n2019 specific cols in og dataset\n\n'ASSAULT_2019', \n'ASSAULT_RATE_2019',\n 'AUTOTHEFT_2019', '\n AUTOTHEFT_RATE_2019', \n 'BIKETHEFT_RATE_2019', \n 'BREAKENTER_2019', \n 'BIKETHEFT_RATE_2019_1', \n 'HOMICIDE_2019', \n 'HOMICIDE_RATE_2019', \n 'ROBBERY_2019', \n 'ROBBERY_RATE_2019', \n 'SHOOTING_2019',\n 'SHOOTING_RATE_2019', \n 'THEFTFROMMV_2019', \n 'THEFTFROMMV_RATE_2019', \n 'THEFTOVER_2019', \n 'THEFTOVER_RATE_2019', \n\n 'POPULATION_2019' (not in og dataset )\n\n\n"

In [311]:
import pandas as pd
import numpy as np 

df_og = pd.read_csv('../sets/archive/tor_neighborhood_condorental.csv')
df_2024 = pd.read_csv('../sets/2024_dataframe.csv')



In [312]:
 # Build df_2019 in the required schema/order from df_og (force manual-join cols to 0)

import numpy as np
import pandas as pd

# Defensive cleanup: some CSVs have stray whitespace in headers
df_og.columns = df_og.columns.astype(str).str.strip()

REQUIRED_COLS_2019 = [
    "AREA_NAME",
    "CLASSIFICATION",
    "CLASSIFICATION_CODE",
    "geometry_wkt",
    "geometry_type",
    "Area",
    "Bachelor Leased",
    "bachelor_avg_lease_rate",
    "1_bedrooms_leased",
    "1_bed_room_avg_lease_rate",
    "2_bedrooms_leased",
    "2_bedrooms_avg_lease_rate",
    "3_bedrooms_leased",
    "3_bedrooms_avg_lease_rate",
    "area_sq_meters",
    "perimeter_meters",
    "park_count",
    "ASSAULT_2019",
    "ASSAULT_RATE_2019",
    "AUTOTHEFT_2019",
    "AUTOTHEFT_RATE_2019",
    "BIKETHEFT_RATE_2019",
    "BREAKENTER_2019",
    "BIKETHEFT_RATE_2019_1",
    "HOMICIDE_2019",
    "HOMICIDE_RATE_2019",
    "ROBBERY_2019",
    "ROBBERY_RATE_2019",
    "SHOOTING_2019",
    "SHOOTING_RATE_2019",
    "THEFTFROMMV_2019",
    "THEFTFROMMV_RATE_2019",
    "THEFTOVER_2019",
    "THEFTOVER_RATE_2019",
    "POPULATION_2019",
    "total_stop_count",
    "avg_stop_frequency",
    "max_stop_frequency",
    "total_line_length_meters",
    "transit_line_density",
    "distinct_route_count",
]

MANUAL_JOIN_FILL_ZERO = {
    "Bachelor Leased",
    "bachelor_avg_lease_rate",
    "1_bedrooms_leased",
    "1_bed_room_avg_lease_rate",
    "2_bedrooms_leased",
    "2_bedrooms_avg_lease_rate",
    "3_bedrooms_leased",
    "3_bedrooms_avg_lease_rate",
}

df_2019 = pd.DataFrame(index=df_og.index)

missing_from_og = []
for col in REQUIRED_COLS_2019:
    if col in MANUAL_JOIN_FILL_ZERO:
        # Force these to 0, even if df_og has values/NaNs
        df_2019[col] = 0
        continue

    if col in df_og.columns:
        df_2019[col] = df_og[col]
    else:
        missing_from_og.append(col)

        # Any 2019-coded cols missing -> create as 0 (including POPULATION_2019)
        if "_2019" in col:
            df_2019[col] = 0
        else:
            df_2019[col] = np.nan

# Enforce exact order
df_2019 = df_2019[REQUIRED_COLS_2019]

print("df_2019 shape:", df_2019.shape)
if missing_from_og:
    print("Missing in df_og (created in df_2019):")
    for c in missing_from_og:
        print(" -", c)

# Quick verify the forced-zero cols
print("Manual-join cols unique sample:")

df_2019.head()

df_2019 shape: (158, 41)
Missing in df_og (created in df_2019):
 - POPULATION_2019
Manual-join cols unique sample:


Unnamed: 0,AREA_NAME,CLASSIFICATION,CLASSIFICATION_CODE,geometry_wkt,geometry_type,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,2_bedrooms_leased,2_bedrooms_avg_lease_rate,3_bedrooms_leased,3_bedrooms_avg_lease_rate,area_sq_meters,perimeter_meters,park_count,ASSAULT_2019,ASSAULT_RATE_2019,AUTOTHEFT_2019,AUTOTHEFT_RATE_2019,BIKETHEFT_RATE_2019,BREAKENTER_2019,BIKETHEFT_RATE_2019_1,HOMICIDE_2019,HOMICIDE_RATE_2019,ROBBERY_2019,ROBBERY_RATE_2019,SHOOTING_2019,SHOOTING_RATE_2019,THEFTFROMMV_2019,THEFTFROMMV_RATE_2019,THEFTOVER_2019,THEFTOVER_RATE_2019,POPULATION_2019,total_stop_count,avg_stop_frequency,max_stop_frequency,total_line_length_meters,transit_line_density,distinct_route_count
0,Keelesdale-Eglinton West,Neighbourhood Improvement Area,NIA,MULTIPOLYGON (((-79.4620964545852 43.685717567...,MultiPolygon,Toronto W03,0,0,0,0,0,0,0,0,0.000196,0.064099,6,57,488.683136,33,282.921814,17.146776,18,154.320984,1.0,8.573388,11,94.307274,,,28,240.054871,8.0,68.587105,0,30,537.266667,1321,1.018065,1.017866,11
1,Bridle Path-Sunnybrook-York Mills,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3783755029958 43.744291877...,MultiPolygon,Toronto C12,0,0,0,0,0,0,0,0,0.000988,0.169672,15,44,442.745026,11,110.686256,60.374321,53,533.306519,,,13,130.811035,3.0,30.18716,25,251.559677,3.0,30.18716,0,73,265.60274,911,1.247435,1.246204,14
2,Birchcliffe-Cliffside,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.2467146164104 43.698979955...,MultiPolygon,Toronto E06,0,0,0,0,0,0,0,0,0.000671,0.150559,20,209,890.65033,26,110.798599,21.307423,57,242.904633,,,32,136.367508,2.0,8.522969,93,396.318085,7.0,29.830393,0,101,282.693069,790,1.291023,1.290157,13
3,Clanton Park,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4369481337577 43.749120943...,MultiPolygon,Toronto C06,0,0,0,0,0,0,0,0,0.000466,0.11026,11,87,488.791504,43,241.586609,50.56464,44,247.204895,,,12,67.419518,2.0,11.236587,68,382.043945,4.0,22.473173,0,63,404.031746,1541,2.894569,2.893222,19
4,North Toronto,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3974366551459 43.706929916...,MultiPolygon,Toronto C04,0,0,0,0,0,0,0,0,4.5e-05,0.027277,3,84,573.809692,4,27.32427,150.283493,36,245.918442,1.0,6.831068,19,129.790283,1.0,6.831068,18,122.959221,4.0,27.32427,0,9,532.888889,1692,0.085268,0.085265,2


In [313]:
"""    
Manually Creating Cols For: 

'Bachelor Leased', 
    'bachelor_avg_lease_rate', 
    '1_bedrooms_leased',
    '1_bed_room_avg_lease_rate', 
    '2_bedrooms_leased', 
    '2_bedrooms_avg_lease_rate', 
    '3_bedrooms_leased', 
    '3_bedrooms_avg_lease_rate'

8 colums x 4 quarters 

"""


data_q1 = {

    "Area": [
        "W01", "W02", "W03", "W04", "W05", "W06", "W07", "W08", "W09", "W10",
        "C01", "C02", "C03", "C04", "C06", "C07", "C08", "C09", "C10", "C11",
        "C12", "C13", "C14", "C15", "E01", "E02", "E03", "E04", "E05", "E06",
        "E07", "E08", "E09", "E10", "E11"
    ],
    "bachelor_leased_q1": [
        1, 0, 0, 2, 0, 2, 0, 1, 0, 0,
        154, 7, 1, 0, 0, 0, 71, 0, 6, 0,
        0, 0, 6, 3, 1, 1, 0, 0, 0, 0,
        0, 0, 3, 0, 0
    ],
    "bachelor_avg_lease_rate_q1": [
        1600, None, None, 1700, None, 1600, None, 1800, None, None,
        1831, 1981, 1725, None, None, None, 1828, None, 1713, None,
        None, None, 1771, 1767, 1900, 1550, None, None, None, None,
        None, None, 1617, None, None
    ],
    "1_bedrooms_leased_q1": [
        31, 42, 4, 54, 19, 169, 0, 87, 2, 10,
        1247, 57, 52, 7, 12, 85, 635, 14, 124, 18,
        6, 37, 179, 200, 20, 6, 1, 18, 12, 3,
        20, 4, 57, 1, 5
    ],
    "1_bed_room_avg_lease_rate_q1": [
        2234, 2068, 1975, 1879, 1874, 2069, None, 1967, 1825, 1871,
        2326, 2564, 2072, 2117, 1900, 2126, 2156, 2393, 2143, 1866,
        2329, 2004, 2127, 2042, 2340, 2097, 1800, 1765, 1893, 1980,
        1759, 1619, 1934, 1250, 1610
    ],
    "2_bedrooms_leased_q1": [
        15, 22, 1, 35, 11, 109, 1, 54, 4, 10,
        554, 57, 8, 10, 14, 86, 311, 12, 66, 16,
        7, 22, 140, 101, 8, 6, 5, 6, 18, 0,
        23, 6, 40, 2, 9
    ],
    "2_bedrooms_avg_lease_rate_q1": [
        3257, 3084, 2300, 2316, 2209, 2891, 2100, 2394, 2436, 2098,
        3304, 4684, 2763, 2990, 2346, 2778, 2957, 3067, 2886, 2327,
        3164, 2534, 2710, 2525, 3062, 3067, 2110, 2170, 2244, None,
        2092, 2016, 2365, 1938, 1953
    ],
    "3_bedrooms_leased_q1": [
        0, 1, 0, 1, 1, 1, 0, 6, 1, 0,
        35, 3, 0, 2, 1, 7, 12, 0, 5, 2,
        0, 0, 6, 2, 0, 0, 0, 2, 1, 0,
        2, 1, 4, 0, 1
    ],
    "3_bedrooms_avg_lease_rate_q1": [
        None, 3500, None, 3100, 2200, 7500, None, 2624, 2549, None,
        5205, 4967, None, 2875, 3300, 2764, 3838, None, 4375, 2400,
        None, None, 3349, 3950, None, None, None, 2365, 2500, None,
        2150, 2300, 2763, None, 2000
    ]
}

In [314]:
"""    
Manually Creating Cols For: 

'Bachelor Leased', 
    'bachelor_avg_lease_rate', 
    '1_bedrooms_leased',
    '1_bed_room_avg_lease_rate', 
    '2_bedrooms_leased', 
    '2_bedrooms_avg_lease_rate', 
    '3_bedrooms_leased', 
    '3_bedrooms_avg_lease_rate'

8 colums x 4 quarters 

"""


data_q2 = {
    "Area": [
        "W01", "W02", "W03", "W04", "W05", "W06", "W07", "W08", "W09", "W10",
        "C01", "C02", "C03", "C04", "C06", "C07", "C08", "C09", "C10", "C11",
        "C12", "C13", "C14", "C15", "E01", "E02", "E03", "E04", "E05", "E06",
        "E07", "E08", "E09", "E10", "E11"
    ],
    "bachelor_leased_q2": [
        3, 2, 0, 2, 3, 1, 0, 1, 0, 0,
        204, 12, 2, 0, 0, 1, 114, 0, 26, 0,
        0, 1, 7, 15, 2, 0, 0, 0, 0, 0,
        0, 0, 4, 0, 0
    ],
    "bachelor_avg_lease_rate_q2": [
        1800, 1750, None, 1763, 1467, 1750, None, 1800, None, None,
        1865, 1833, 1548, None, None, 2250, 1832, None, 1674, None,
        None, 1700, 1732, 1746, 1772, None, None, None, None, None,
        None, None, 1600, None, None
    ],
    "1_bedrooms_leased_q2": [
        40, 36, 0, 56, 27, 232, 5, 108, 3, 16,
        1723, 99, 74, 17, 18, 178, 837, 19, 333, 19,
        8, 51, 280, 363, 42, 18, 4, 9, 19, 5,
        35, 3, 75, 1, 5
    ],
    "1_bed_room_avg_lease_rate_q2": [
        2170, 2235, None, 1874, 1939, 2119, 2090, 2054, 1800, 1929,
        2359, 2514, 2103, 2461, 1969, 2169, 2275, 2426, 2125, 1913,
        2444, 2021, 2128, 2038, 2257, 2153, 2112, 1811, 1941, 1820,
        1880, 1800, 1957, 1550, 1755
    ],
    "2_bedrooms_leased_q2": [
        17, 21, 4, 45, 13, 131, 4, 71, 5, 15,
        771, 77, 44, 20, 14, 177, 472, 30, 116, 17,
        8, 36, 214, 153, 21, 10, 3, 13, 25, 7,
        31, 10, 68, 1, 6
    ],
    "2_bedrooms_avg_lease_rate_q2": [
        2816, 2745, 2531, 2353, 2408, 2786, 2571, 2599, 2370, 2209,
        3363, 4474, 2776, 3010, 2378, 2680, 3075, 3547, 2967, 2489,
        3523, 2540, 2710, 2539, 2783, 2745, 2100, 2079, 2237, 2450,
        2200, 2105, 2454, 1850, 2125
    ],
    "3_bedrooms_leased_q2": [
        3, 1, 3, 8, 0, 3, 2, 6, 2, 0,
        70, 1, 1, 4, 0, 14, 16, 6, 2, 6,
        0, 5, 16, 14, 0, 0, 0, 0, 5, 1,
        1, 2, 10, 0, 0
    ],
    "3_bedrooms_avg_lease_rate_q2": [
        3500, 2850, 2667, 2525, None, 4200, 3675, 2700, 2518, None,
        4888, 9200, 4250, 2988, None, 2904, 3883, 5779, 3150, 2615,
        None, 2489, 3469, 3256, None, None, None, None, 2310, 2450,
        2400, 2350, 2975, None, None
    ]
}



In [315]:
"""    
Manually Creating Cols For: 

'Bachelor Leased', 
    'bachelor_avg_lease_rate', 
    '1_bedrooms_leased',
    '1_bed_room_avg_lease_rate', 
    '2_bedrooms_leased', 
    '2_bedrooms_avg_lease_rate', 
    '3_bedrooms_leased', 
    '3_bedrooms_avg_lease_rate'

8 colums x 4 quarters 

"""

data_q3 = {
    "Area": [
        "W01", "W02", "W03", "W04", "W05", "W06", "W07", "W08", "W09", "W10",
        "C01", "C02", "C03", "C04", "C06", "C07", "C08", "C09", "C10", "C11",
        "C12", "C13", "C14", "C15", "E01", "E02", "E03", "E04", "E05", "E06",
        "E07", "E08", "E09", "E10", "E11"
    ],
    "bachelor_leased_q3": [
        1, 2, 1, 2, 4, 2, 0, 1, 0, 0,
        213, 17, 4, 3, 0, 1, 161, 0, 5, 0,
        0, 1, 11, 19, 3, 0, 0, 0, 0, 1,
        0, 0, 9, 0, 0
    ],
    "bachelor_avg_lease_rate_q3": [
        2100, 1875, 1300, 1650, 1413, 1725, None, 1700, None, None,
        1958, 1957, 1562, 1692, None, 1660, 1952, None, 1850, None,
        None, 1800, 1823, 1752, 2300, None, None, None, None, 1450,
        None, None, 1742, None, None
    ],
    "1_bedrooms_leased_q3": [
        36, 41, 2, 48, 19, 241, 5, 128, 1, 23,
        2015, 126, 40, 15, 16, 162, 960, 17, 183, 30,
        8, 43, 390, 469, 87, 15, 3, 11, 6, 13,
        46, 7, 113, 0, 2
    ],
    "1_bed_room_avg_lease_rate_q3": [
        2276, 2168, 1865, 2003, 2083, 2175, 2330, 2098, 1950, 1908,
        2417, 2590, 2116, 2236, 2058, 2265, 2378, 2802, 2243, 1948,
        2453, 2084, 2191, 2110, 2267, 2222, 1950, 1895, 2033, 1913,
        1907, 1923, 2021, None, 1923
    ],
    "2_bedrooms_leased_q3": [
        21, 18, 0, 26, 15, 163, 2, 71, 6, 16,
        842, 80, 41, 11, 15, 173, 408, 28, 109, 18,
        10, 41, 346, 203, 39, 15, 7, 11, 31, 4,
        43, 12, 80, 1, 6
    ],
      "2_bedrooms_avg_lease_rate_q3": [
        3028, 3065, None, 2442, 2383, 2832, 2500, 2584, 2408, 2242,
        3443, 4424, 2985, 3445, 2460, 2818, 3218, 3617, 3016, 2421,
        3250, 2560, 2792, 2631, 2838, 2787, 2443, 2223, 2316, 2300,
        2309, 2181, 2591, 2100, 2200
    ],
    "3_bedrooms_leased_q3": [
        1, 1, 2, 4, 0, 7, 0, 4, 0, 0,
        63, 1, 7, 3, 1, 21, 22, 0, 2, 0,
        0, 4, 10, 11, 0, 0, 1, 1, 6, 0,
        4, 1, 10, 0, 1
    ],
    "3_bedrooms_avg_lease_rate_q3": [
        3300, 2900, 2800, 2840, None, 4507, None, 2863, None, None,
        4788, 6050, 4979, 3940, 3300, 3224, 4466, None, 4225, None,
        None, 2950, 3620, 3776, None, None, 2450, 2400, 2542, None,
        2289, 2450, 3010, None, 2400
    ]
}





In [316]:
"""    
Manually Creating Cols For: 

'Bachelor Leased', 
    'bachelor_avg_lease_rate', 
    '1_bedrooms_leased',
    '1_bed_room_avg_lease_rate', 
    '2_bedrooms_leased', 
    '2_bedrooms_avg_lease_rate', 
    '3_bedrooms_leased', 
    '3_bedrooms_avg_lease_rate'

8 colums x 4 quarters 

"""

data_q4 = {
        "Area": [
        "W01", "W02", "W03", "W04", "W05", "W06", "W07", "W08", "W09", "W10",
        "C01", "C02", "C03", "C04", "C06", "C07", "C08", "C09", "C10", "C11",
        "C12", "C13", "C14", "C15", "E01", "E02", "E03", "E04", "E05", "E06",
        "E07", "E08", "E09", "E10", "E11"
    ],
    "bachelor_leased_q4": [
        1, 2, 0, 1, 2, 3, 0, 0, 0, 0,
        128, 7, 3, 0, 0, 0, 72, 0, 3, 0,
        0, 2, 6, 10, 2, 0, 0, 0, 0, 1,
        0, 1, 3, 0, 0
    ],
    "bachelor_avg_lease_rate_q4": [
        1850, 1700, None, 1800, 1500, 1700, None, None, None, None,
        1909, 1921, 1583, None, None, None, 1913, None, 1850, None,
        None, 1813, 1852, 1790, 1775, None, None, None, None, 1325,
        None, 1600, 1627, None, None
    ],
    "1_bedrooms_leased_q4": [
        46, 37, 6, 29, 21, 162, 1, 82, 3, 22,
        1262, 86, 28, 12, 22, 95, 518, 17, 142, 11,
        5, 30, 217, 269, 94, 9, 4, 15, 16, 5,
        27, 5, 73, 0, 6
    ],
    "1_bed_room_avg_lease_rate_q4": [
        2266, 2231, 2042, 2003, 2045, 2184, 1850, 2112, 2067, 2022,
        2343, 2504, 2150, 2097, 2051, 2215, 2311, 2835, 2196, 1921,
        2550, 2066, 2186, 2108, 2113, 2177, 2019, 1888, 1875, 1925,
        1868, 1829, 1971, None, 1772
    ],
    "2_bedrooms_leased_q4": [
        14, 12, 1, 19, 14, 128, 3, 59, 6, 6,
        454, 43, 25, 14, 13, 101, 182, 12, 58, 14,
        1, 30, 170, 95, 50, 8, 3, 10, 16, 7,
        32, 5, 47, 1, 9
    ],
    "2_bedrooms_avg_lease_rate_q4": [
        3462, 2866, 2400, 2424, 2309, 2794, 2450, 2736, 2507, 2350,
        3306, 5900, 2910, 3246, 2453, 2759, 3151, 4066, 2982, 2484,
        2850, 2478, 2734, 2642, 2775, 3106, 2420, 2150, 2311, 2529,
        2185, 2265, 2345, 2100, 2167
    ],
    "3_bedrooms_leased_q4": [
        3, 4, 1, 3, 1, 2, 0, 1, 1, 0,
        28, 1, 1, 1, 2, 17, 5, 1, 2, 2,
        0, 2, 7, 4, 2, 1, 0, 1, 5, 0,
        1, 1, 6, 3, 0
    ],
    "3_bedrooms_avg_lease_rate_q4": [
        4917, 3900, 2700, 2517, 2600, 3225, None, 2600, 2200, None,
        4358, 5000, 3600, 2450, 3225, 3174, 4110, 7155, 3900, 2475,
        None, 2700, 3386, 2813, 3550, 4950, None, 2400, 2440, None,
        2400, 2200, 2653, 2067, None
    ]
}

 

In [317]:
manual_cols = [
    "Bachelor Leased",
    "bachelor_avg_lease_rate",
    "1_bedrooms_leased",
    "1_bed_room_avg_lease_rate",
    "2_bedrooms_leased",
    "2_bedrooms_avg_lease_rate",
    "3_bedrooms_leased",
    "3_bedrooms_avg_lease_rate",
]

missing = [c for c in manual_cols if c not in rental_avg.columns]
missing, rental_avg.columns.tolist()

([],
 ['Area',
  'Bachelor Leased',
  'bachelor_avg_lease_rate',
  '1_bedrooms_leased',
  '1_bed_room_avg_lease_rate',
  '2_bedrooms_leased',
  '2_bedrooms_avg_lease_rate',
  '3_bedrooms_leased',
  '3_bedrooms_avg_lease_rate',
  '_area_code'])

In [None]:
import pandas as pd

# Build a merge key that matches between df_2019 ("Toronto W03") and rental_avg ("W03")
df_2019["Area"] = df_2019["Area"].astype(str).str.strip()
rental_avg["Area"] = rental_avg["Area"].astype(str).str.strip()

df_2019["_area_code"] = df_2019["Area"].str.extract(r"([WCE]\d{2})", expand=False)
rental_avg["_area_code"] = rental_avg["Area"].str.extract(r"([WCE]\d{2})", expand=False)

df_2019 = df_2019.merge(
    rental_avg[["_area_code"] + manual_cols],
    on="_area_code",
    how="left",
    sort=False,
    suffixes=("", "_rental"),
)

for c in manual_cols:
    df_2019[c] = df_2019[c].mask(df_2019[c].eq(0), df_2019[f"{c}_rental"])
    df_2019.drop(columns=[f"{c}_rental"], inplace=True)

df_2019.drop(columns=["_area_code"], inplace=True)

df_2019[manual_cols] = df_2019[manual_cols].apply(pd.to_numeric, errors="coerce").fillna(0)

df_2019.head()

Unnamed: 0,AREA_NAME,CLASSIFICATION,CLASSIFICATION_CODE,geometry_wkt,geometry_type,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,2_bedrooms_leased,2_bedrooms_avg_lease_rate,3_bedrooms_leased,3_bedrooms_avg_lease_rate,area_sq_meters,perimeter_meters,park_count,ASSAULT_2019,ASSAULT_RATE_2019,AUTOTHEFT_2019,AUTOTHEFT_RATE_2019,BIKETHEFT_RATE_2019,BREAKENTER_2019,BIKETHEFT_RATE_2019_1,HOMICIDE_2019,HOMICIDE_RATE_2019,ROBBERY_2019,ROBBERY_RATE_2019,SHOOTING_2019,SHOOTING_RATE_2019,THEFTFROMMV_2019,THEFTFROMMV_RATE_2019,THEFTOVER_2019,THEFTOVER_RATE_2019,POPULATION_2019,total_stop_count,avg_stop_frequency,max_stop_frequency,total_line_length_meters,transit_line_density,distinct_route_count
0,Keelesdale-Eglinton West,Neighbourhood Improvement Area,NIA,MULTIPOLYGON (((-79.4620964545852 43.685717567...,MultiPolygon,Toronto W03,0.25,1300.0,3.0,1960.666667,1.5,2410.333333,1.5,2722.333333,0.000196,0.064099,6,57,488.683136,33,282.921814,17.146776,18,154.320984,1.0,8.573388,11,94.307274,,,28,240.054871,8.0,68.587105,0,30,537.266667,1321,1.018065,1.017866,11
1,Bridle Path-Sunnybrook-York Mills,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3783755029958 43.744291877...,MultiPolygon,Toronto C12,0.0,0.0,6.75,2444.0,6.5,3196.75,0.0,0.0,0.000988,0.169672,15,44,442.745026,11,110.686256,60.374321,53,533.306519,,,13,130.811035,3.0,30.18716,25,251.559677,3.0,30.18716,0,73,265.60274,911,1.247435,1.246204,14
2,Birchcliffe-Cliffside,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.2467146164104 43.698979955...,MultiPolygon,Toronto E06,0.5,1387.5,6.5,1909.5,4.5,2426.333333,0.25,2450.0,0.000671,0.150559,20,209,890.65033,26,110.798599,21.307423,57,242.904633,,,32,136.367508,2.0,8.522969,93,396.318085,7.0,29.830393,0,101,282.693069,790,1.291023,1.290157,13
3,Clanton Park,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4369481337577 43.749120943...,MultiPolygon,Toronto C06,0.0,0.0,17.0,1994.5,14.0,2409.25,1.0,3275.0,0.000466,0.11026,11,87,488.791504,43,241.586609,50.56464,44,247.204895,,,12,67.419518,2.0,11.236587,68,382.043945,4.0,22.473173,0,63,404.031746,1541,2.894569,2.893222,19
4,North Toronto,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3974366551459 43.706929916...,MultiPolygon,Toronto C04,0.75,1692.0,12.75,2227.75,13.75,3172.75,2.5,3063.25,4.5e-05,0.027277,3,84,573.809692,4,27.32427,150.283493,36,245.918442,1.0,6.831068,19,129.790283,1.0,6.831068,18,122.959221,4.0,27.32427,0,9,532.888889,1692,0.085268,0.085265,2


In [324]:
df_2019.to_csv('../sets/2019_dataframe.csv', index=False)