In [10]:
import pandas as pd
import numpy as np

df_og = pd.read_csv('../data/raw_data/tor_neighborhood_condorental.csv')
df_2020 = pd.read_csv('../data/raw_data/2020.csv')


static_cols = [
    'AREA_NAME', 'CLASSIFICATION', 'CLASSIFICATION_CODE', 'geometry_wkt',
    'geometry_type', 'Area', 'area_sq_meters', 'perimeter_meters',
    'park_count', 'total_stop_count', 'avg_stop_frequency', 'max_stop_frequency',
    'total_line_length_meters', 'transit_line_density', 'distinct_route_count'
]

year_specific_cols_2019 = [
    'ASSAULT_2019', 'ASSAULT_RATE_2019', 'AUTOTHEFT_2019', 'AUTOTHEFT_RATE_2019',
    'BIKETHEFT_RATE_2019', 'BREAKENTER_2019', 'BIKETHEFT_RATE_2019_1',
    'HOMICIDE_2019', 'HOMICIDE_RATE_2019', 'ROBBERY_2019', 'ROBBERY_RATE_2019',
    'SHOOTING_2019', 'SHOOTING_RATE_2019', 'THEFTFROMMV_2019',
    'THEFTFROMMV_RATE_2019', 'THEFTOVER_2019', 'THEFTOVER_RATE_2019'
]

rental_cols_to_create = [
    'Bachelor Leased', 'bachelor_avg_lease_rate', '1_bedrooms_leased',
    '1_bed_room_avg_lease_rate', '2_bedrooms_leased', '2_bedrooms_avg_lease_rate',
    '3_bedrooms_leased', '3_bedrooms_avg_lease_rate'
]

final_col_order = [
    'AREA_NAME', 'CLASSIFICATION', 'CLASSIFICATION_CODE', 'geometry_wkt',
    'geometry_type', 'Area', 'Bachelor Leased', 'bachelor_avg_lease_rate',
    '1_bedrooms_leased', '1_bed_room_avg_lease_rate', '2_bedrooms_leased',
    '2_bedrooms_avg_lease_rate', '3_bedrooms_leased', '3_bedrooms_avg_lease_rate',
    'area_sq_meters', 'perimeter_meters', 'park_count', 'ASSAULT_2019',
    'ASSAULT_RATE_2019', 'AUTOTHEFT_2019', 'AUTOTHEFT_RATE_2019',
    'BIKETHEFT_RATE_2019', 'BREAKENTER_2019', 'BIKETHEFT_RATE_2019_1',
    'HOMICIDE_2019', 'HOMICIDE_RATE_2019', 'ROBBERY_2019', 'ROBBERY_RATE_2019',
    'SHOOTING_2019', 'SHOOTING_RATE_2019', 'THEFTFROMMV_2019',
    'THEFTFROMMV_RATE_2019', 'THEFTOVER_2019', 'THEFTOVER_RATE_2019',
    'POPULATION_2019', 'total_stop_count', 'avg_stop_frequency',
    'max_stop_frequency', 'total_line_length_meters', 'transit_line_density',
    'distinct_route_count'
]



cols_from_og = static_cols + year_specific_cols_2019
df_2019 = df_og[cols_from_og].copy()


df_2019['POPULATION_2019'] = 0

for col in rental_cols_to_create:
    df_2019[col] = np.nan

df_2019 = df_2019[final_col_order]

df_2019.head()

Unnamed: 0,AREA_NAME,CLASSIFICATION,CLASSIFICATION_CODE,geometry_wkt,geometry_type,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,...,THEFTFROMMV_RATE_2019,THEFTOVER_2019,THEFTOVER_RATE_2019,POPULATION_2019,total_stop_count,avg_stop_frequency,max_stop_frequency,total_line_length_meters,transit_line_density,distinct_route_count
0,Keelesdale-Eglinton West,Neighbourhood Improvement Area,NIA,MULTIPOLYGON (((-79.4620964545852 43.685717567...,MultiPolygon,Toronto W03,,,,,...,240.054871,8.0,68.587105,0,30,537.266667,1321,1.018065,1.017866,11
1,Bridle Path-Sunnybrook-York Mills,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3783755029958 43.744291877...,MultiPolygon,Toronto C12,,,,,...,251.559677,3.0,30.18716,0,73,265.60274,911,1.247435,1.246204,14
2,Birchcliffe-Cliffside,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.2467146164104 43.698979955...,MultiPolygon,Toronto E06,,,,,...,396.318085,7.0,29.830393,0,101,282.693069,790,1.291023,1.290157,13
3,Clanton Park,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4369481337577 43.749120943...,MultiPolygon,Toronto C06,,,,,...,382.043945,4.0,22.473173,0,63,404.031746,1541,2.894569,2.893222,19
4,North Toronto,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3974366551459 43.706929916...,MultiPolygon,Toronto C04,,,,,...,122.959221,4.0,27.32427,0,9,532.888889,1692,0.085268,0.085265,2


In [11]:
import pandas as pd
import numpy as np

# --- Create DataFrame for 2019 Q1 with Standardized Column Names ---

q1_data_dict = {
    "Area": [
        "Toronto W01", "Toronto W02", "Toronto W03", "Toronto W04", "Toronto W05",
        "Toronto W06", "Toronto W07", "Toronto W08", "Toronto W09", "Toronto W10",
        "Toronto C01", "Toronto C02", "Toronto C03", "Toronto C04", "Toronto C06",
        "Toronto C07", "Toronto C08", "Toronto C09", "Toronto C10", "Toronto C11",
        "Toronto C12", "Toronto C13", "Toronto C14", "Toronto C15", 
        "Toronto E01", "Toronto E02", "Toronto E03", "Toronto E04", "Toronto E05", 
        "Toronto E06", "Toronto E07", "Toronto E08", "Toronto E09", "Toronto E10", 
        "Toronto E11",
    ],
    "Bachelor Leased": [
        1, 0, 0, 2, 0, 2, 0, 1, 0, 0,
        154, 7, 1, 0, 0, 0, 71, 0, 6, 0, 0, 0, 6, 3,
        1, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0,
    ],
    "bachelor_avg_lease_rate": [
        1600, np.nan, np.nan, 1700, np.nan, 1600, np.nan, 1800, np.nan, np.nan,
        1831, 1981, 1725, np.nan, np.nan, np.nan, 1828, np.nan, 1713, np.nan, np.nan, np.nan, 1771, 1767,
        1900, 1550, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 1617, np.nan, np.nan,
    ],
    "1_bedrooms_leased": [
        31, 42, 4, 54, 19, 169, 0, 87, 2, 10,
        1247, 57, 52, 7, 12, 85, 635, 14, 124, 18, 6, 37, 179, 200,
        20, 6, 1, 18, 12, 3, 20, 4, 57, 1, 5,
    ],
    "1_bed_room_avg_lease_rate": [
        2234, 2068, 1975, 1879, 1874, 2069, np.nan, 1967, 1825, 1871,
        2326, 2564, 2072, 2117, 1900, 2126, 2156, 2393, 2143, 1866, 2329, 2004, 2127, 2042,
        2340, 2097, 1800, 1765, 1893, 1980, 1759, 1619, 1934, 1250, 1610,
    ],
    "2_bedrooms_leased": [
        15, 22, 1, 35, 11, 109, 1, 54, 4, 10,
        554, 57, 8, 10, 14, 86, 311, 12, 66, 16, 7, 22, 140, 101,
        8, 6, 5, 6, 18, 0, 23, 6, 40, 2, 9,
    ],
    "2_bedrooms_avg_lease_rate": [
        3257, 3084, 2300, 2316, 2209, 2891, 2100, 2394, 2436, 2098,
        3304, 4684, 2763, 2990, 2346, 2778, 2957, 3067, 2886, 2327, 3164, 2534, 2710, 2525,
        3062, 3067, 2110, 2170, 2244, np.nan, 2092, 2016, 2365, 1938, 1953,
    ],
    "3_bedrooms_leased": [
        0, 1, 0, 1, 1, 1, 0, 6, 1, 0,
        35, 3, 0, 2, 1, 7, 12, 0, 5, 2, 0, 0, 6, 2,
        0, 0, 0, 2, 1, 0, 2, 1, 4, 0, 1,
    ],
    "3_bedrooms_avg_lease_rate": [
        np.nan, 3500, np.nan, 3100, 2200, 7500, np.nan, 2624, 2549, np.nan,
        5205, 4967, np.nan, 2875, 3300, 2764, 3838, np.nan, 4375, 2400, np.nan, np.nan, 3349, 3950,
        np.nan, np.nan, np.nan, 2365, 2500, np.nan, 2150, 2300, 2763, np.nan, 2000,
    ],
}

# Create the DataFrame from the dictionary
df_q1 = pd.DataFrame(q1_data_dict)

df_q1.head()

Unnamed: 0,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,2_bedrooms_leased,2_bedrooms_avg_lease_rate,3_bedrooms_leased,3_bedrooms_avg_lease_rate
0,Toronto W01,1,1600.0,31,2234.0,15,3257.0,0,
1,Toronto W02,0,,42,2068.0,22,3084.0,1,3500.0
2,Toronto W03,0,,4,1975.0,1,2300.0,0,
3,Toronto W04,2,1700.0,54,1879.0,35,2316.0,1,3100.0
4,Toronto W05,0,,19,1874.0,11,2209.0,1,2200.0


In [12]:

import pandas as pd
import numpy as np


q2_data_dict = {
    "Area": [
        "Toronto W01", "Toronto W02", "Toronto W03", "Toronto W04", "Toronto W05",
        "Toronto W06", "Toronto W07", "Toronto W08", "Toronto W09", "Toronto W10",
        "Toronto C01", "Toronto C02", "Toronto C03", "Toronto C04", "Toronto C06",
        "Toronto C07", "Toronto C08", "Toronto C09", "Toronto C10", "Toronto C11",
        "Toronto C12", "Toronto C13", "Toronto C14", "Toronto C15", "Toronto E01",
        "Toronto E02", "Toronto E03", "Toronto E04", "Toronto E05", "Toronto E06",
        "Toronto E07", "Toronto E08", "Toronto E09", "Toronto E10", "Toronto E11",
    ],
    "Bachelor Leased": [
        3, 2, 0, 2, 3, 1, 0, 1, 0, 0,
        204, 12, 2, 0, 0, 1, 114, 0, 26, 0, 0, 1, 7, 15,
        2, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0,
    ],
    "bachelor_avg_lease_rate": [
        1800, 1750, np.nan, 1763, 1467, 1750, np.nan, 1800, np.nan, np.nan,
        1865, 1833, 1548, np.nan, np.nan, 2250, 1832, np.nan, 1674, np.nan,
        np.nan, 1700, 1732, 1746, 1772, np.nan, np.nan, np.nan, np.nan, np.nan,
        np.nan, np.nan, 1600, np.nan, np.nan,
    ],
    "1_bedrooms_leased": [
        40, 36, 0, 56, 27, 232, 5, 108, 3, 16,
        1723, 99, 74, 17, 18, 178, 837, 19, 333, 19, 8, 51, 280, 363,
        42, 18, 4, 9, 19, 5, 35, 3, 75, 1, 5,
    ],
    "1_bed_room_avg_lease_rate": [
        2170, 2235, np.nan, 1874, 1939, 2119, 2090, 2054, 1800, 1929,
        2359, 2514, 2103, 2461, 1969, 2169, 2275, 2426, 2125, 1913,
        2444, 2021, 2128, 2038, 2257, 2153, 2112, 1811, 1941, 1820,
        1880, 1800, 1957, 1550, 1755,
    ],
    "2_bedrooms_leased": [
        17, 21, 4, 45, 13, 131, 4, 71, 5, 15,
        771, 77, 44, 20, 14, 177, 472, 30, 116, 17, 8, 36, 214, 153,
        21, 10, 3, 13, 25, 7, 31, 10, 68, 1, 6,
    ],
    "2_bedrooms_avg_lease_rate": [
        2816, 2745, 2531, 2353, 2408, 2786, 2571, 2599, 2370, 2209,
        3363, 4474, 2776, 3010, 2378, 2680, 3075, 3547, 2967, 2489,
        3523, 2540, 2710, 2539, 2783, 2745, 2100, 2079, 2237, 2450,
        2200, 2105, 2454, 1850, 2125,
    ],
    "3_bedrooms_leased": [
        3, 1, 3, 8, 0, 3, 2, 6, 2, 0,
        70, 1, 1, 4, 0, 14, 16, 6, 2, 6, 0, 5, 16, 14,
        0, 0, 0, 0, 5, 1, 1, 2, 10, 0, 0,
    ],
    "3_bedrooms_avg_lease_rate": [
        3500, 2850, 2667, 2525, np.nan, 4200, 3675, 2700, 2518, np.nan,
        4888, 9200, 4250, 2988, np.nan, 2904, 3883, 5779, 3150, 2615,
        np.nan, 2489, 3469, 3256, np.nan, np.nan, np.nan, np.nan, 2310, 2450,
        2400, 2350, 2975, np.nan, np.nan,
    ],
}

df_q2 = pd.DataFrame(q2_data_dict)

df_q2.head()


Unnamed: 0,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,2_bedrooms_leased,2_bedrooms_avg_lease_rate,3_bedrooms_leased,3_bedrooms_avg_lease_rate
0,Toronto W01,3,1800.0,40,2170.0,17,2816,3,3500.0
1,Toronto W02,2,1750.0,36,2235.0,21,2745,1,2850.0
2,Toronto W03,0,,0,,4,2531,3,2667.0
3,Toronto W04,2,1763.0,56,1874.0,45,2353,8,2525.0
4,Toronto W05,3,1467.0,27,1939.0,13,2408,0,


In [13]:

import pandas as pd
import numpy as np

# --- Create DataFrame for 2019 Q3 with Standardized Column Names ---

q3_data_dict = {
    "Area": [
        "Toronto W01", "Toronto W02", "Toronto W03", "Toronto W04", "Toronto W05",
        "Toronto W06", "Toronto W07", "Toronto W08", "Toronto W09", "Toronto W10",
        "Toronto C01", "Toronto C02", "Toronto C03", "Toronto C04", "Toronto C06",
        "Toronto C07", "Toronto C08", "Toronto C09", "Toronto C10", "Toronto C11",
        "Toronto C12", "Toronto C13", "Toronto C14", "Toronto C15", "Toronto E01",
        "Toronto E02", "Toronto E03", "Toronto E04", "Toronto E05", "Toronto E06",
        "Toronto E07", "Toronto E08", "Toronto E09", "Toronto E10", "Toronto E11",
    ],
    "Bachelor Leased": [
        1, 2, 1, 2, 4, 2, 0, 1, 0, 0,
        213, 17, 4, 3, 0, 1, 161, 0, 5, 0, 0, 1, 11, 19,
        3, 0, 0, 0, 0, 1, 0, 0, 9, 0, 0,
    ],
    "bachelor_avg_lease_rate": [
        2100, 1875, 1300, 1650, 1413, 1725, np.nan, 1700, np.nan, np.nan,
        1958, 1957, 1562, 1692, np.nan, 1660, 1952, np.nan, 1850, np.nan, np.nan, 1800, 1823, 1752,
        2300, np.nan, np.nan, np.nan, np.nan, 1450, np.nan, np.nan, 1742, np.nan, np.nan,
    ],
    "1_bedrooms_leased": [
        36, 41, 2, 48, 19, 241, 5, 128, 1, 23,
        2015, 126, 40, 15, 16, 162, 960, 17, 183, 30, 8, 43, 390, 469,
        87, 15, 3, 11, 6, 13, 46, 7, 113, 0, 2,
    ],
    "1_bed_room_avg_lease_rate": [
        2276, 2168, 1865, 2003, 2083, 2175, 2330, 2098, 1950, 1908,
        2417, 2590, 2116, 2236, 2058, 2265, 2378, 2802, 2243, 1948, 2453, 2084, 2191, 2110,
        2267, 2222, 1950, 1895, 2033, 1913, 1907, 1923, 2021, np.nan, 1923,
    ],
    "2_bedrooms_leased": [
        21, 18, 0, 26, 15, 163, 2, 71, 6, 16,
        842, 80, 41, 11, 15, 173, 408, 28, 109, 18, 10, 41, 346, 203,
        39, 15, 7, 11, 31, 4, 43, 12, 80, 1, 6,
    ],
    "2_bedrooms_avg_lease_rate": [
        3028, 3065, np.nan, 2442, 2383, 2832, 2500, 2584, 2408, 2242,
        3443, 4424, 2985, 3445, 2460, 2818, 3218, 3617, 3016, 2421, 3250, 2560, 2792, 2631,
        2838, 2787, 2443, 2223, 2316, 2300, 2309, 2181, 2591, 2100, 2200,
    ],
    "3_bedrooms_leased": [
        1, 1, 2, 4, 0, 7, 0, 4, 0, 0,
        63, 1, 7, 3, 1, 21, 22, 0, 2, 0, 0, 4, 10, 11,
        0, 0, 1, 1, 6, 0, 4, 1, 10, 0, 1,
    ],
    "3_bedrooms_avg_lease_rate": [
        3300, 2900, 2800, 2840, np.nan, 4507, np.nan, 2863, np.nan, np.nan,
        4788, 6050, 4979, 3940, 3300, 3224, 4466, np.nan, 4225, np.nan, np.nan, 2950, 3620, 3776,
        np.nan, np.nan, 2450, 2400, 2542, np.nan, 2289, 2450, 3010, np.nan, 2400,
    ],
}

df_q3 = pd.DataFrame(q3_data_dict)

df_q3.head()


Unnamed: 0,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,2_bedrooms_leased,2_bedrooms_avg_lease_rate,3_bedrooms_leased,3_bedrooms_avg_lease_rate
0,Toronto W01,1,2100.0,36,2276.0,21,3028.0,1,3300.0
1,Toronto W02,2,1875.0,41,2168.0,18,3065.0,1,2900.0
2,Toronto W03,1,1300.0,2,1865.0,0,,2,2800.0
3,Toronto W04,2,1650.0,48,2003.0,26,2442.0,4,2840.0
4,Toronto W05,4,1413.0,19,2083.0,15,2383.0,0,


In [14]:

import pandas as pd
import numpy as np


q4_data_dict = {
    "Area": [
        "Toronto W01", "Toronto W02", "Toronto W03", "Toronto W04", "Toronto W05",
        "Toronto W06", "Toronto W07", "Toronto W08", "Toronto W09", "Toronto W10",
        "Toronto C01", "Toronto C02", "Toronto C03", "Toronto C04", "Toronto C06",
        "Toronto C07", "Toronto C08", "Toronto C09", "Toronto C10", "Toronto C11",
        "Toronto C12", "Toronto C13", "Toronto C14", "Toronto C15", "Toronto E01",
        "Toronto E02", "Toronto E03", "Toronto E04", "Toronto E05", "Toronto E06",
        "Toronto E07", "Toronto E08", "Toronto E09", "Toronto E10", "Toronto E11",
    ],
    "Bachelor Leased": [
        1, 2, 0, 1, 2, 3, 0, 0, 0, 0,
        128, 7, 3, 0, 0, 0, 72, 0, 3, 0, 0, 2, 6, 10,
        2, 0, 0, 0, 0, 1, 0, 1, 3, 0, 0,
    ],
    "bachelor_avg_lease_rate": [
        1850, 1700, np.nan, 1800, 1500, 1700, np.nan, np.nan, np.nan, np.nan,
        1909, 1921, 1583, np.nan, np.nan, np.nan, 1913, np.nan, 1850, np.nan, np.nan, 1813, 1852, 1790,
        1775, np.nan, np.nan, np.nan, np.nan, 1325, np.nan, 1600, 1627, np.nan, np.nan,
    ],
    "1_bedrooms_leased": [
        46, 37, 6, 29, 21, 162, 1, 82, 3, 22,
        1262, 86, 28, 12, 22, 95, 518, 17, 142, 11, 5, 30, 217, 269,
        94, 9, 4, 15, 16, 5, 27, 5, 73, 0, 6,
    ],
    "1_bed_room_avg_lease_rate": [
        2266, 2231, 2042, 2003, 2045, 2184, 1850, 2112, 2067, 2022,
        2343, 2504, 2150, 2097, 2051, 2215, 2311, 2835, 2196, 1921, 2550, 2066, 2186, 2108,
        2113, 2177, 2019, 1888, 1875, 1925, 1868, 1829, 1971, np.nan, 1772,
    ],
    "2_bedrooms_leased": [
        14, 12, 1, 19, 14, 128, 3, 59, 6, 6,
        454, 43, 25, 14, 13, 101, 182, 12, 58, 14, 1, 30, 170, 95,
        50, 8, 3, 10, 16, 7, 32, 5, 47, 1, 9,
    ],
    "2_bedrooms_avg_lease_rate": [
        3462, 2866, 2400, 2424, 2309, 2794, 2450, 2736, 2507, 2350,
        3306, 5900, 2910, 3246, 2453, 2759, 3151, 4066, 2982, 2484, 2850, 2478, 2734, 2642,
        2775, 3106, 2420, 2150, 2311, 2529, 2185, 2265, 2345, 2100, 2167,
    ],
    "3_bedrooms_leased": [
        3, 4, 1, 3, 1, 2, 0, 1, 1, 0,
        28, 1, 1, 1, 2, 17, 5, 1, 2, 2, 0, 2, 7, 4,
        2, 1, 0, 1, 5, 0, 1, 1, 6, 3, 0,
    ],
    "3_bedrooms_avg_lease_rate": [
        4917, 3900, 2700, 2517, 2600, 3225, np.nan, 2600, 2200, np.nan,
        4358, 5000, 3600, 2450, 3225, 3174, 4110, 7155, 3900, 2475, np.nan, 2700, 3386, 2813,
        3550, 4950, np.nan, 2400, 2440, np.nan, 2400, 2200, 2653, 2067, np.nan,
    ],
}

df_q4 = pd.DataFrame(q4_data_dict)

df_q4.head()


Unnamed: 0,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,2_bedrooms_leased,2_bedrooms_avg_lease_rate,3_bedrooms_leased,3_bedrooms_avg_lease_rate
0,Toronto W01,1,1850.0,46,2266.0,14,3462,3,4917.0
1,Toronto W02,2,1700.0,37,2231.0,12,2866,4,3900.0
2,Toronto W03,0,,6,2042.0,1,2400,1,2700.0
3,Toronto W04,1,1800.0,29,2003.0,19,2424,3,2517.0
4,Toronto W05,2,1500.0,21,2045.0,14,2309,1,2600.0


In [15]:

import pandas as pd
import numpy as np

# --- Create Annual Report: Sum Leased, Weighted Rate ---

# Combine all quarters
df_all_quarters = pd.concat([df_q1, df_q2, df_q3, df_q4], ignore_index=True)

# Define the unit types and their corresponding columns (Leased Col, Rate Col)
unit_cols = [
    ('Bachelor Leased', 'bachelor_avg_lease_rate'),
    ('1_bedrooms_leased', '1_bed_room_avg_lease_rate'),
    ('2_bedrooms_leased', '2_bedrooms_avg_lease_rate'),
    ('3_bedrooms_leased', '3_bedrooms_avg_lease_rate')
]

grouped = df_all_quarters.groupby('Area')
annual_rows = []

for area, group in grouped:
    row = {'Area': area}
    
    for leased_col, rate_col in unit_cols:
        # 1. Sum the number values
        total_leased = group[leased_col].sum()
        row[leased_col] = total_leased
        
        # 2. Calculate weighted rate
        # Filter for rows that have valid rate data for weighting
        valid_data = group.dropna(subset=[rate_col])
        total_valid_leased = valid_data[leased_col].sum()
        
        if total_valid_leased > 0:
            # Weighted Average: sum(rate * leased) / sum(leased)
            weighted_rate = (valid_data[leased_col] * valid_data[rate_col]).sum() / total_valid_leased
        else:
            # Fallback to simple mean if no volume to weight by. 
            # If all are NaN, this remains NaN.
            weighted_rate = group[rate_col].mean()
            
        # Assign directly to the original column name
        row[rate_col] = weighted_rate

    annual_rows.append(row)

df_annual = pd.DataFrame(annual_rows)

# Standardize Area names
df_annual['Area'] = df_annual['Area'].str.strip()

# Display the first few rows
print("Annual DataFrame Created.")
df_annual.head()


Annual DataFrame Created.


Unnamed: 0,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,2_bedrooms_leased,2_bedrooms_avg_lease_rate,3_bedrooms_leased,3_bedrooms_avg_lease_rate
0,Toronto C01,699,1893.905579,6247,2367.888587,2621,3366.355971,196,4836.75
1,Toronto C02,43,1920.44186,368,2545.429348,257,4743.603113,6,5858.5
2,Toronto C03,10,1581.8,194,2104.154639,118,2876.127119,9,4744.777778
3,Toronto C04,3,1692.0,51,2261.960784,55,3153.436364,10,3197.2
4,Toronto C06,0,,68,2004.294118,56,2409.375,4,3262.5


In [16]:
df_2019_indexed = df_2019.set_index('Area')
df_annual_indexed = df_annual.set_index('Area')

df_2019_indexed.update(df_annual_indexed)

df_2019_final = df_2019_indexed.reset_index()

final_col_order = [
    'AREA_NAME', 'CLASSIFICATION', 'CLASSIFICATION_CODE', 'geometry_wkt',
    'geometry_type', 'Area', 'Bachelor Leased', 'bachelor_avg_lease_rate',
    '1_bedrooms_leased', '1_bed_room_avg_lease_rate', '2_bedrooms_leased',
    '2_bedrooms_avg_lease_rate', '3_bedrooms_leased', '3_bedrooms_avg_lease_rate',
    'area_sq_meters', 'perimeter_meters', 'park_count', 'ASSAULT_2019',
    'ASSAULT_RATE_2019', 'AUTOTHEFT_2019', 'AUTOTHEFT_RATE_2019',
    'BIKETHEFT_RATE_2019', 'BREAKENTER_2019', 'BIKETHEFT_RATE_2019_1',
    'HOMICIDE_2019', 'HOMICIDE_RATE_2019', 'ROBBERY_2019', 'ROBBERY_RATE_2019',
    'SHOOTING_2019', 'SHOOTING_RATE_2019', 'THEFTFROMMV_2019',
    'THEFTFROMMV_RATE_2019', 'THEFTOVER_2019', 'THEFTOVER_RATE_2019',
    'POPULATION_2019', 'total_stop_count', 'avg_stop_frequency',
    'max_stop_frequency', 'total_line_length_meters', 'transit_line_density',
    'distinct_route_count'
]

df_2019_final = df_2019_final[final_col_order]

df_2019_final.head(40)


Unnamed: 0,AREA_NAME,CLASSIFICATION,CLASSIFICATION_CODE,geometry_wkt,geometry_type,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,...,THEFTFROMMV_RATE_2019,THEFTOVER_2019,THEFTOVER_RATE_2019,POPULATION_2019,total_stop_count,avg_stop_frequency,max_stop_frequency,total_line_length_meters,transit_line_density,distinct_route_count
0,Keelesdale-Eglinton West,Neighbourhood Improvement Area,NIA,MULTIPOLYGON (((-79.4620964545852 43.685717567...,MultiPolygon,Toronto W03,1.0,1300.0,12.0,1990.166667,...,240.054871,8.0,68.587105,0,30,537.266667,1321,1.018065,1.017866,11
1,Bridle Path-Sunnybrook-York Mills,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3783755029958 43.744291877...,MultiPolygon,Toronto C12,0.0,,27.0,2440.740741,...,251.559677,3.0,30.18716,0,73,265.60274,911,1.247435,1.246204,14
2,Birchcliffe-Cliffside,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.2467146164104 43.698979955...,MultiPolygon,Toronto E06,2.0,1387.5,26.0,1905.153846,...,396.318085,7.0,29.830393,0,101,282.693069,790,1.291023,1.290157,13
3,Clanton Park,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4369481337577 43.749120943...,MultiPolygon,Toronto C06,0.0,,68.0,2004.294118,...,382.043945,4.0,22.473173,0,63,404.031746,1541,2.894569,2.893222,19
4,North Toronto,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3974366551459 43.706929916...,MultiPolygon,Toronto C04,3.0,1692.0,51.0,2261.960784,...,122.959221,4.0,27.32427,0,9,532.888889,1692,0.085268,0.085265,2
5,York University Heights,Neighbourhood Improvement Area,NIA,MULTIPOLYGON (((-79.4684684284053 43.775193863...,MultiPolygon,Toronto W05,9.0,1450.333333,86.0,1982.337209,...,542.551025,31.0,105.780388,0,211,357.483412,2079,6.302904,6.293581,24
6,High Park-Swansea,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4477045017347 43.642830257...,MultiPolygon,Toronto W01,6.0,1825.0,153.0,2236.771242,...,236.132401,13.0,53.854759,0,78,265.282051,872,1.850047,1.848941,22
7,Yorkdale-Glen Park,Emerging Neighbourhood,EN,MULTIPOLYGON (((-79.4639574512317 43.700360286...,MultiPolygon,Toronto W04,7.0,1718.0,187.0,1928.561497,...,512.881653,30.0,178.912216,0,131,347.442748,848,3.378362,3.376084,12
8,Mimico-Queensway,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4869569370828 43.623887593...,MultiPolygon,Toronto W06,8.0,1687.5,804.0,2138.373134,...,379.974335,14.0,71.887032,0,83,263.638554,529,0.964652,0.964063,12
9,South Eglinton-Davisville,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3863510515018 43.697831265...,MultiPolygon,Toronto C10,40.0,1715.05,782.0,2168.360614,...,106.956635,3.0,13.369579,0,23,492.0,1689,0.500042,0.499989,12


In [26]:
df_2019_final.to_csv('../data/raw_data/2019.csv', index=False)