In [None]:
# 2017 PIPELINE
# cols must match this format 

""" ['AREA_NAME', 'CLASSIFICATION', 'CLASSIFICATION_CODE', 'geometry_wkt',
       'geometry_type', 'Area', 'Bachelor Leased', 'bachelor_avg_lease_rate',
       '1_bedrooms_leased', '1_bed_room_avg_lease_rate', '2_bedrooms_leased',
       '2_bedrooms_avg_lease_rate', '3_bedrooms_leased',
       '3_bedrooms_avg_lease_rate', 'area_sq_meters', 'perimeter_meters',
       'park_count', 'ASSAULT_2024', 'ASSAULT_RATE_2024', 'AUTOTHEFT_2024',
       'AUTOTHEFT_RATE_2024', 'BIKETHEFT_RATE_2024', 'BREAKENTER_2024',
       'BIKETHEFT_RATE_2024_1', 'HOMICIDE_2024', 'HOMICIDE_RATE_2024',
       'ROBBERY_2024', 'ROBBERY_RATE_2024', 'SHOOTING_2024',
       'SHOOTING_RATE_2024', 'THEFTFROMMV_2024', 'THEFTFROMMV_RATE_2024',
       'THEFTOVER_2024', 'THEFTOVER_RATE_2024', 'POPULATION_2024',
       'total_stop_count', 'avg_stop_frequency', 'max_stop_frequency',
       'total_line_length_meters', 'transit_line_density',
       'distinct_route_count'] """

"""
Static cols: 

AREA_NAME'
'CLASSIFICATION'
'CLASSIFICATION_CODE'
'geometry_wkt'
'geometry_type'
'Area'
'area_sq_meters'
'perimeter_meters'
'park_count'
'total_stop_count'
'avg_stop_frequency'
'max_stop_frequency'
'total_line_length_meters'
'transit_line_density'
'distinct_route_count'


2017 specific cols in og dataset

'ASSAULT_2017', 
'ASSAULT_RATE_2017',
 'AUTOTHEFT_2017', '
 AUTOTHEFT_RATE_2017', 
 'BIKETHEFT_RATE_2017', 
 'BREAKENTER_2017', 
 'BIKETHEFT_RATE_2017_1', 
 'HOMICIDE_2017', 
 'HOMICIDE_RATE_2017', 
 'ROBBERY_2017', 
 'ROBBERY_RATE_2017', 
 'SHOOTING_2017',
 'SHOOTING_RATE_2017', 
 'THEFTFROMMV_2017', 
 'THEFTFROMMV_RATE_2017', 
 'THEFTOVER_2017', 
 'THEFTOVER_RATE_2017', 

 'POPULATION_2017' (not in og dataset )


"""

# read from tor_neighborgood_condorental.csv, take all static rows, and year coded rows for 2017. 
# create population row, fill with 0s if not found in og dataset

#  'Bachelor Leased', 'bachelor_avg_lease_rat'1_bedrooms_leased', '1_bed_room_avg_lease_rate', '2_bedrooms_leased', '2_bedrooms_avg_lease_rate', '3_bedrooms_leased','3_bedrooms_avg_lease_rate should be created with null values, I will manually join these after 

In [55]:
import pandas as pd

# Load the source dataset
df = pd.read_csv('tor_neighborhood_condorental.csv')
df_2024 = pd.read_csv('../sets/2024_dataframe.csv')


# Define the target columns for 2017 (update crime columns to 2017)
ordered_cols = [
    'AREA_NAME', 'CLASSIFICATION', 'CLASSIFICATION_CODE', 'geometry_wkt',
    'geometry_type', 'Area', 'Bachelor Leased', 'bachelor_avg_lease_rate',
    '1_bedrooms_leased', '1_bed_room_avg_lease_rate', '2_bedrooms_leased',
    '2_bedrooms_avg_lease_rate', '3_bedrooms_leased',
    '3_bedrooms_avg_lease_rate', 'area_sq_meters', 'perimeter_meters',
    'park_count', 'ASSAULT_2017', 'ASSAULT_RATE_2017', 'AUTOTHEFT_2017',
    'AUTOTHEFT_RATE_2017', 'BIKETHEFT_RATE_2017', 'BREAKENTER_2017',
    'BIKETHEFT_RATE_2017_1', 'HOMICIDE_2017', 'HOMICIDE_RATE_2017',
    'ROBBERY_2017', 'ROBBERY_RATE_2017', 'SHOOTING_2017',
    'SHOOTING_RATE_2017', 'THEFTFROMMV_2017', 'THEFTFROMMV_RATE_2017',
    'THEFTOVER_2017', 'THEFTOVER_RATE_2017', 'POPULATION_2017',
    'total_stop_count', 'avg_stop_frequency', 'max_stop_frequency',
    'total_line_length_meters', 'transit_line_density',
    'distinct_route_count'
]

# Create missing rental columns with nulls
for col in [
    'Bachelor Leased', 'bachelor_avg_lease_rate', '1_bedrooms_leased',
    '1_bed_room_avg_lease_rate', '2_bedrooms_leased', '2_bedrooms_avg_lease_rate',
    '3_bedrooms_leased', '3_bedrooms_avg_lease_rate'
]:
        df[col] = 0
        

# Create population column, fill with 0s if not found
if 'POPULATION_2017' not in df.columns:
    df['POPULATION_2017'] = 0

# Reindex to match the schema
df_2017 = df.reindex(columns=ordered_cols)

df_2017.to_csv('../sets/2017_dataframe.csv', index=False)


In [56]:
df_2017.head()

Unnamed: 0,AREA_NAME,CLASSIFICATION,CLASSIFICATION_CODE,geometry_wkt,geometry_type,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,...,THEFTFROMMV_RATE_2017,THEFTOVER_2017,THEFTOVER_RATE_2017,POPULATION_2017,total_stop_count,avg_stop_frequency,max_stop_frequency,total_line_length_meters,transit_line_density,distinct_route_count
0,Keelesdale-Eglinton West,Neighbourhood Improvement Area,NIA,MULTIPOLYGON (((-79.4620964545852 43.685717567...,MultiPolygon,Toronto W03,0,0,0,0,...,182.672241,4.0,34.794712,0,30,537.266667,1321,1.018065,1.017866,11
1,Bridle Path-Sunnybrook-York Mills,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3783755029958 43.744291877...,MultiPolygon,Toronto C12,0,0,0,0,...,121.876907,5.0,50.782043,0,73,265.60274,911,1.247435,1.246204,14
2,Birchcliffe-Cliffside,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.2467146164104 43.698979955...,MultiPolygon,Toronto E06,0,0,0,0,...,251.28894,5.0,21.66284,0,101,282.693069,790,1.291023,1.290157,13
3,Clanton Park,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4369481337577 43.749120943...,MultiPolygon,Toronto C06,0,0,0,0,...,399.76825,5.0,28.968714,0,63,404.031746,1541,2.894569,2.893222,19
4,North Toronto,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3974366551459 43.706929916...,MultiPolygon,Toronto C04,0,0,0,0,...,111.39402,2.0,15.913431,0,9,532.888889,1692,0.085268,0.085265,2


In [18]:
cols_2017 = [col for col in df.columns if '2017' in col]
print("\nColumns for 2017:")
print(cols_2017)



Columns for 2017:
['ASSAULT_2017', 'ASSAULT_RATE_2017', 'AUTOTHEFT_2017', 'AUTOTHEFT_RATE_2017', 'BIKETHEFT_RATE_2017', 'BREAKENTER_2017', 'BIKETHEFT_RATE_2017_1', 'HOMICIDE_2017', 'HOMICIDE_RATE_2017', 'ROBBERY_2017', 'ROBBERY_RATE_2017', 'SHOOTING_2017', 'SHOOTING_RATE_2017', 'THEFTFROMMV_2017', 'THEFTFROMMV_RATE_2017', 'THEFTOVER_2017', 'THEFTOVER_RATE_2017', 'POPULATION_2017']


In [46]:
import pandas as pd
# Manually creating dataframe from treb to merge (ai OCR works to an extent, still have to go over everything to fix mistakes)

# Q1 2017 
data_q1 = {
    "Area": [
        "W01", "W02", "W03", "W04", "W05", "W06", "W07", "W08", "W09", "W10",
        "C01", "C02", "C03", "C04", "C06", "C07", "C08", "C09", "C10", "C11",
        "C12", "C13", "C14", "C15", "E01", "E02", "E03", "E04", "E05", "E06",
        "E07", "E08", "E09", "E10", "E11"
    ],
    "Bachelor Leased": [
        0, 2, 0, 1, 0, 2, 0, 1, 0, 0,
        127, 10, 2, 4, 0, 1, 44, 0, 4, 0,
        0, 3, 6, 3, 0, 0, 0, 0, 0, 0,
        0, 0, 5, 0, 0
    ],
    "bachelor_avg_lease_rate": [
        None, 1550, None, 1400, None, 1418, None, 1375, None, None,
        1543, 1530, 1300, 1275, None, 1450, 1480, None, 1425, None,
        None, 1273, 1472, 1393, None, None, None, None, None, None,
        None, None, 1266, None, None
    ],
    "1_bedrooms_leased": [
        28, 36, 1, 77, 26, 203, 3, 69, 0, 8,
        1351, 85, 18, 19, 11, 97, 408, 14, 177, 49,
        6, 60, 2161, 175, 13, 4, 1, 6, 13, 1,
        39, 5, 63, 2, 3
    ],
    "1_bed_room_avg_lease_rate": [
        1878, 1729, 1550, 1499, 1592, 1687, 1700, 1684, None, 1422,
        1947, 2222, 1766, 1888, 1591, 1776, 1931, 2046, 1834, 1472,
        2058, 1614, 1732, 1658, 1960, 1613, 1100, 1450, 1492, 1425,
        1501, 1374, 1553, 1500, 1433
    ],
    "2_bedrooms_leased": [
        12, 24, 0, 22, 8, 123, 3, 50, 6, 6,
        632, 53, 9, 14, 13, 78, 182, 11, 93, 34,
        11, 26, 158, 53, 10, 6, 3, 11, 15, 1,
        29, 4, 38, 0, 8
    ],
    "2_bedrooms_avg_lease_rate": [
        2581, 2358, None, 1889, 1880, 2332, 2613, 2058, 1988, 1698,
        2890, 3957, 2677, 2875, 1894, 2306, 2638, 3322, 2552, 2017,
        3001, 1992, 2300, 2144, 2507, 2567, 1867, 1659, 1864, 1900,
        1688, 1706, 1891, None, 1631
    ],
    "3_bedrooms_leased": [
        1, 1, 0, 3, 1, 4, 0, 3, 0, 0,
        27, 3, 1, 2, 1, 9, 3, 3, 0, 1,
        0, 2, 5, 4, 0, 0, 0, 1, 4, 0,
        2, 0, 1, 0, 0
    ],
    "3_bedrooms_avg_lease_rate": [
        2150, 3300, None, 2350, 1950, 3275, None, 2283, None, None,
        4348, 9223, 2650, 2313, 2500, 2442, 4232, 4617, None, 1650,
        None, 2175, 3474, 2070, None, None, None, 1700, 2215, None,
        1900, None, 1850, None, None
    ]
}

# Q1 2017 
import pandas as pd

# Q2 2017
data_q2 = {
    "Area": [
        "W01", "W02", "W03", "W04", "W05", "W06", "W07", "W08", "W09", "W10",
        "C01", "C02", "C03", "C04", "C06", "C07", "C08", "C09", "C10", "C11",
        "C12", "C13", "C14", "C15", "E01", "E02", "E03", "E04", "E05", "E06",
        "E07", "E08", "E09", "E10", "E11"
    ],
    "Bachelor Leased": [
        0, 3, 0, 1, 1, 5, 0, 0, 0, 0,
        164, 13, 2, 2, 0, 0, 49, 0, 3, 0, 0, 1, 4, 8,
        0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0 
       
    ],
    "bachelor_avg_lease_rate": [
        1814, 1683, 1400, 1596, 1545, 1777, 1700, 1721, 1900, 1571,
        2030, 2060, 1831, 1683, 1674, 1837, 1997, 2138, 1893, 1567, 2108, 1657, 1760, 1692,
        2089, 1763, 1450, 1448, 1601, None, 1521, 1490, 1601, 1300, 1538
    ],
    "1_bedrooms_leased": [
        36, 79, 1, 31, 21, 239, 1, 83, 1, 27,
        1699, 131, 34, 12, 18, 182, 575, 18, 182, 29,
        12, 39, 297, 251, 18, 8, 1, 9, 9, 0,
        25, 5, 79, 1, 2
    ],
    "1_bed_room_avg_lease_rate": [
        1814, 1683, 1400, 1596, 1545, 1777, 1700, 1721, 1900, 1571,
        2030, 2060, 1831, 1683, 1674, 1837, 1997, 2138, 1893, 1567,
        2108, 1657, 1760, 1692, 2089, 1763, 1450, 1448, 1601, None,
        1521, 1490, 1601, 1300, 1538
    ],
    "2_bedrooms_leased": [
        24, 25, 1, 13, 4, 166, 1, 63, 7, 10,
        827, 84, 31, 16, 17, 127, 254, 21, 105, 21, 8, 30, 248, 110, 
        6, 10, 5, 9, 19, 0, 28, 2, 55, 4, 6
    ],
    "2_bedrooms_avg_lease_rate": [
        2499, 2571, 2520, 2017, 1848, 2531, 2050, 2213, 1957, 1750,
        2983, 3749, 2562, 2987, 2045, 2427, 2794, 2746, 2687, 2132,
        2966, 2122, 2347, 2310, 2858, 2655, 1960, 1733, 1940, None,
        1831, 1675, 1989, 1437, 1679
    ],
    "3_bedrooms_leased": [
        1, 1, 0, 2, 1, 2, 0, 2, 0, 1,
        58, 2, 0, 1, 0, 16, 4, 2, 3, 3, 1, 2, 18, 6,
        0, 0, 0, 0, 2, 0, 2, 0, 10, 0, 1
    ],

    "3_bedrooms_avg_lease_rate": [
        3000, 2550, None, 2600, 1400, 2550, None, 2125, None, 575,
        4088, 5275, None, 2250, None, 2757, 4861, 5325, 3667, 2073,
        2900, 2925, 3055, 3063, None, None, None, None, 1950, None,
        1850, None, 2269, None, 1750
    ]
}

import pandas as pd

# Q3 2017 
data_q3 = {
    "Area": [
        "W01", "W02", "W03", "W04", "W05", "W06", "W07", "W08", "W09", "W10",
        "C01", "C02", "C03", "C04", "C06", "C07", "C08", "C09", "C10", "C11",
        "C12", "C13", "C14", "C15", "E01", "E02", "E03", "E04", "E05", "E06",
        "E07", "E08", "E09", "E10", "E11"
    ],
    "Bachelor Leased": [
        2, 1, 0, 0, 0, 3, 0, 0, 0, 0,
        185, 12, 2, 0, 0, 1, 69, 1, 0, 0,
        0, 0, 6, 12, 4, 1, 0, 0, 0, 0,
        0, 0, 6, 0, 0
    ],
    "bachelor_avg_lease_rate": [
        1050, 1500, None, None, None, 1465, None, None, None, None,
        1707, 1694, 1450, None, None, None, 1722, 1600, None, None,
        None, None, 1549, 1500, 1780, 1300, None, None, None, None,
        None, None, 1366, None, None
    ],
    "1_bedrooms_leased": [
        43, 53, 0, 21, 26, 150, 5, 67, 0, 42,
        1717, 144, 23, 23, 12, 152, 528, 19, 79, 23,
        7, 40, 323, 306, 20, 8, 7, 11, 21, 2,
        28, 3, 98, 0, 6
    ],
    "1_bed_room_avg_lease_rate": [
        1977, 1829, None, 1626, 1706, 1881, 2139, 1763, None, 1634,
        2158, 2338, 2034, 1888, 1758, 1935, 2132, 2129, 2044, 1689,
        2169, 1699, 1859, 1827, 2044, 2034, 1787, 1579, 1631, 1725,
        1643, 1597, 1665, None, 1430
    ],
    "2_bedrooms_leased": [
        16, 32, 0, 24, 5, 153, 0, 68, 3, 21,
        870, 67, 18, 10, 12, 155, 284, 29, 50, 19,
        10, 33, 332, 145, 11, 15, 5, 8, 15, 0,
        51, 8, 79, 2, 9
    ],
    "2_bedrooms_avg_lease_rate": [
        2839, 2322, None, 1931, 1765, 2612, None, 2178, 2100, 1876,
        3039, 4289, 2786, 2789, 2165, 2481, 2997, 3138, 2908, 2088,2986, 2196, 2499, 2562, 
        2750, 2893, 2189, 1791, 2043, None, 1894, 1803, 2113, 1500, 1695
    ],
    "3_bedrooms_leased": [
        3, 0, 1, 0, 0, 3, 1, 1, 0, 1,
        67, 3, 1, 0, 1, 21, 6, 2, 1, 2,
        1, 0, 7, 7, 0, 1, 1, 1, 9, 0, 4,
        0, 10, 1, 0
    ],
    "3_bedrooms_avg_lease_rate": [
        2867, None, 2400, None, None, 3200, 1750, 1850, None, 1900,
        4279, 5775, 3850, None, 2750, 2898, 3877, 3425, 3560, 2000, 2600, None, 3143, 2854, 
        None, 1995, 2700, 2000, 2056, None, 2025, None, 2553, 1350, None
    ]
}



import pandas as pd

# Q4 2017 rental data from screenshot, formatted for easy column editing
data_q4 = {
    "Area": [
        "W01", "W02", "W03", "W04", "W05", "W06", "W07", "W08", "W09", "W10",
        "C01", "C02", "C03", "C04", "C06", "C07", "C08", "C09", "C10", "C11",
        "C12", "C13", "C14", "C15", "E01", "E02", "E03", "E04", "E05", "E06",
        "E07", "E08", "E09", "E10", "E11"
    ],

    "Bachelor Leased": [
        0, 0, 0, 0, 2, 2, 0, 0, 0, 0,
        97, 8, 3, 0, 0, 0, 60, 0, 1, 1, 0, 0, 5, 14,
        0, 2, 
        0, 0, 0, 0, 0, 0, 2, 0, 0
       
    ],
    "bachelor_avg_lease_rate": [
        None, None, None, None, 1213, 1675, None, None, None, None, 
        1706, 1806, 1550, None, None, None, 1705, None, 1450, 1495, None, None, 1565, 1512,
        None, 1375, None, None, None, None, None, None, 1425, None, None

    ],
    "1_bedrooms_leased": [
        24, 30, 0, 18, 16, 113, 3, 73, 1, 14, 
        1112, 49, 10, 11, 14, 84, 347, 2, 64, 14, 3, 29, 197, 220,
        22, 8, 6, 8, 9, 3, 20, 4, 62, 1, 2
    ],
    "1_bed_room_avg_lease_rate": [
        1998, 1912, None, 1733, 1660, 1895, 1915, 1821, 1500, 1613,
        2152, 2652, 1988, 1993, 1723, 1994, 2103, 2175, 2106, 1642, 2250, 1806, 1925, 1838,
        2048, 1912, 1463, 1500, 1714, 1608, 1616, 1419, 1677, 950, 1490 
        
    ],

    "2_bedrooms_leased": [
        22, 16, 2, 14, 8, 83, 2, 49, 2, 24, 
        532, 37, 9, 9, 11, 100, 163, 14, 35, 8, 5, 20, 151, 97,
        4, 9, 2, 13, 13, 1, 22, 5, 34, 1, 9 
    ],

    "2_bedrooms_avg_lease_rate": [
        2632, 2471, 2925, 2064, 2081, 2642, 3100, 2252, 2625, 1845,
        3158, 4343, 2864, 2721, 2193, 2568, 3037, 3489, 2743, 2144, 3090, 2276, 2541, 2354,
        2919, 2894, 1988, 1754, 1989, 1850, 1883, 1810, 2036, 1400, 1783
    ],
    "3_bedrooms_leased": [
        0, 0, 1, 2, 3, 2, 0, 1, 2, 0,
        48, 6, 0, 1, 0, 11, 10, 1, 1, 3, 0, 1, 6, 2,
        0, 0, 1, 2, 4, 0, 6, 0, 3, 0, 0
      
    ],
    "3_bedrooms_avg_lease_rate": [
        None, None, 2400, 2125, 2079, 4150, None, 2025, 2098, None, 
        4460, 13925, None, 2250, None, 2895, 3395, 3450, 3000, 2050, None, 2800, 3042, 1975,
        None, None, 2800, 1800, 1838, None, 1818, None, 2383, None, None
    ]
}

for quarter_name, quarter_data in [
    ("Q1", data_q1),
    ("Q2", data_q2),
    ("Q3", data_q3),
    ("Q4", data_q4),
]:
    print(f"{quarter_name}:")
    for name, values in quarter_data.items():
        print(f"{name}: {len(values)}")
        if len(values) != 35:
            print(f"WARNING: {quarter_name} - {name} has {len(values)} values (should be 35)")

Q1:
Area: 35
Bachelor Leased: 35
bachelor_avg_lease_rate: 35
1_bedrooms_leased: 35
1_bed_room_avg_lease_rate: 35
2_bedrooms_leased: 35
2_bedrooms_avg_lease_rate: 35
3_bedrooms_leased: 35
3_bedrooms_avg_lease_rate: 35
Q2:
Area: 35
Bachelor Leased: 35
bachelor_avg_lease_rate: 35
1_bedrooms_leased: 35
1_bed_room_avg_lease_rate: 35
2_bedrooms_leased: 35
2_bedrooms_avg_lease_rate: 35
3_bedrooms_leased: 35
3_bedrooms_avg_lease_rate: 35
Q3:
Area: 35
Bachelor Leased: 35
bachelor_avg_lease_rate: 35
1_bedrooms_leased: 35
1_bed_room_avg_lease_rate: 35
2_bedrooms_leased: 35
2_bedrooms_avg_lease_rate: 35
3_bedrooms_leased: 35
3_bedrooms_avg_lease_rate: 35
Q4:
Area: 35
Bachelor Leased: 35
bachelor_avg_lease_rate: 35
1_bedrooms_leased: 35
1_bed_room_avg_lease_rate: 35
2_bedrooms_leased: 35
2_bedrooms_avg_lease_rate: 35
3_bedrooms_leased: 35
3_bedrooms_avg_lease_rate: 35


In [61]:
# Create averaged rental dataframe from all quarters
quarters = []
for data in [data_q1, data_q2, data_q3, data_q4]:
    quarters.append(pd.DataFrame(data))

rental_avg = pd.concat(quarters).groupby('Area').mean(numeric_only=True).reset_index()


# Show the result
(rental_avg.head())

Unnamed: 0,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,2_bedrooms_leased,2_bedrooms_avg_lease_rate,3_bedrooms_leased,3_bedrooms_avg_lease_rate
0,C01,143.25,1746.5,1469.75,2071.75,715.25,3017.5,50.0,4293.75
1,C02,10.75,1772.5,102.25,2318.0,60.25,4084.5,3.5,8549.5
2,C03,2.25,1532.75,21.25,1904.75,16.75,2722.25,0.5,3250.0
3,C04,1.5,1479.0,16.25,1863.0,12.25,2843.0,1.0,2271.0
4,C06,0.0,1674.0,13.75,1686.5,13.25,2074.25,0.5,2625.0


In [57]:

df_2017.head()

Unnamed: 0,AREA_NAME,CLASSIFICATION,CLASSIFICATION_CODE,geometry_wkt,geometry_type,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,...,THEFTFROMMV_RATE_2017,THEFTOVER_2017,THEFTOVER_RATE_2017,POPULATION_2017,total_stop_count,avg_stop_frequency,max_stop_frequency,total_line_length_meters,transit_line_density,distinct_route_count
0,Keelesdale-Eglinton West,Neighbourhood Improvement Area,NIA,MULTIPOLYGON (((-79.4620964545852 43.685717567...,MultiPolygon,Toronto W03,0,0,0,0,...,182.672241,4.0,34.794712,0,30,537.266667,1321,1.018065,1.017866,11
1,Bridle Path-Sunnybrook-York Mills,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3783755029958 43.744291877...,MultiPolygon,Toronto C12,0,0,0,0,...,121.876907,5.0,50.782043,0,73,265.60274,911,1.247435,1.246204,14
2,Birchcliffe-Cliffside,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.2467146164104 43.698979955...,MultiPolygon,Toronto E06,0,0,0,0,...,251.28894,5.0,21.66284,0,101,282.693069,790,1.291023,1.290157,13
3,Clanton Park,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4369481337577 43.749120943...,MultiPolygon,Toronto C06,0,0,0,0,...,399.76825,5.0,28.968714,0,63,404.031746,1541,2.894569,2.893222,19
4,North Toronto,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3974366551459 43.706929916...,MultiPolygon,Toronto C04,0,0,0,0,...,111.39402,2.0,15.913431,0,9,532.888889,1692,0.085268,0.085265,2


In [None]:
# List of rental columns to update
rental_cols = [
    'Bachelor Leased',
    'bachelor_avg_lease_rate',
    '1_bedrooms_leased',
    '1_bed_room_avg_lease_rate',
    '2_bedrooms_leased',
    '2_bedrooms_avg_lease_rate',
    '3_bedrooms_leased',
    '3_bedrooms_avg_lease_rate'
]

# Remove any existing 'Toronto ' prefix before adding
rental_avg['Area'] = rental_avg['Area'].str.replace('Toronto ', '', regex=False)
rental_avg['Area'] = 'Toronto ' + rental_avg['Area']
rental_avg_indexed = rental_avg.set_index('Area')

print("df_2017 Areas:", df_2017['Area'].unique())
print("rental_avg Areas:", rental_avg['Area'].unique())

rental_avg.head()

# Update rental columns in df_2017
for col in rental_cols:
    df_2017.loc[df_2017[col] == 0, col] = df_2017['Area'].map(rental_avg_indexed[col])

# Show the result
df_2017.head()

df_2017 Areas: ['Toronto W03' 'Toronto C12' 'Toronto E06' 'Toronto C06' 'Toronto C04'
 'Toronto W05' 'Toronto W01' 'Toronto W04' 'Toronto W06' 'Toronto C10'
 'Toronto W10' 'Toronto C01' 'Toronto W08' 'Toronto C08' 'Toronto E05'
 'Toronto C11' 'Toronto W09' 'Toronto C15' 'Toronto C13' 'Toronto E09'
 'Toronto E03' 'Toronto E02' 'Toronto E08' 'Toronto C07' 'Toronto E10'
 'Toronto E11' 'Toronto E07' 'Toronto W02' 'Toronto C14' 'Toronto C09'
 'Toronto C02' 'Toronto E04' 'Toronto E01' 'Toronto C03' 'Toronto W07']
rental_avg Areas: ['Toronto C01' 'Toronto C02' 'Toronto C03' 'Toronto C04' 'Toronto C06'
 'Toronto C07' 'Toronto C08' 'Toronto C09' 'Toronto C10' 'Toronto C11'
 'Toronto C12' 'Toronto C13' 'Toronto C14' 'Toronto C15' 'Toronto E01'
 'Toronto E02' 'Toronto E03' 'Toronto E04' 'Toronto E05' 'Toronto E06'
 'Toronto E07' 'Toronto E08' 'Toronto E09' 'Toronto E10' 'Toronto E11'
 'Toronto W01' 'Toronto W02' 'Toronto W03' 'Toronto W04' 'Toronto W05'
 'Toronto W06' 'Toronto W07' 'Toronto W08' 

Unnamed: 0,AREA_NAME,CLASSIFICATION,CLASSIFICATION_CODE,geometry_wkt,geometry_type,Area,Bachelor Leased,bachelor_avg_lease_rate,1_bedrooms_leased,1_bed_room_avg_lease_rate,...,THEFTFROMMV_RATE_2017,THEFTOVER_2017,THEFTOVER_RATE_2017,POPULATION_2017,total_stop_count,avg_stop_frequency,max_stop_frequency,total_line_length_meters,transit_line_density,distinct_route_count
0,Keelesdale-Eglinton West,Neighbourhood Improvement Area,NIA,MULTIPOLYGON (((-79.4620964545852 43.685717567...,MultiPolygon,Toronto W03,,,,,...,182.672241,4.0,34.794712,0,30,537.266667,1321,1.018065,1.017866,11
1,Bridle Path-Sunnybrook-York Mills,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3783755029958 43.744291877...,MultiPolygon,Toronto C12,,,,,...,121.876907,5.0,50.782043,0,73,265.60274,911,1.247435,1.246204,14
2,Birchcliffe-Cliffside,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.2467146164104 43.698979955...,MultiPolygon,Toronto E06,,,,,...,251.28894,5.0,21.66284,0,101,282.693069,790,1.291023,1.290157,13
3,Clanton Park,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.4369481337577 43.749120943...,MultiPolygon,Toronto C06,,,,,...,399.76825,5.0,28.968714,0,63,404.031746,1541,2.894569,2.893222,19
4,North Toronto,Not an NIA or Emerging Neighbourhood,,MULTIPOLYGON (((-79.3974366551459 43.706929916...,MultiPolygon,Toronto C04,,,,,...,111.39402,2.0,15.913431,0,9,532.888889,1692,0.085268,0.085265,2
