In [4]:
import geopandas as gpd
import pandas as pd

geo_df = gpd.read_file("../data/raw/neighbourhood-boundaries.geojson")

raw_df = pd.read_excel("../data/raw/neighbourhood-profiles-2021-158-model.xlsx")
df = raw_df.transpose().reset_index()
df.columns = ['Neighbourhood'] + list(df.iloc[0, 1:])
df = df[1:]

print(df.columns.tolist())

# data cleaning - removing previous formatting
df['Neighbourhood'] = df['Neighbourhood'].str.strip().str.title()
geo_df['AREA_NAME'] = geo_df['AREA_NAME'].str.replace(r'\s*\(\d+\)', '', regex = True)

merged_data = geo_df.merge(df, left_on = 'AREA_NAME', right_on = 'Neighbourhood', how = 'left')

['Neighbourhood', 'Neighbourhood Number', 'TSNS 2020 Designation', 'Total - Age groups of the population - 25% sample data', '  0 to 14 years - total', '    0 to 4 years', '    5 to 9 years', '    10 to 14 years', '  15 to 64 years - total', '    15 to 19 years', '    20 to 24 years', '    25 to 29 years', '    30 to 34 years', '    35 to 39 years', '    40 to 44 years', '    45 to 49 years', '    50 to 54 years', '    55 to 59 years', '    60 to 64 years', '  65 years and over - total', '    65 to 69 years', '    70 to 74 years', '    75 to 79 years', '    80 to 84 years', '    85 years and over', '      85 to 89 years', '      90 to 94 years', '      95 to 99 years', '      100 years and over', 'Total - Distribution (%) of the population by broad age groups - 25% sample data', '  0 to 14 years - %', '  15 to 64 years - %', '  65 years and over - %', '    85 years and over - %', 'Average age of the population', 'Median age of the population', 'Total - Persons in private households - 2

In [8]:
import os

vulnerability_data = merged_data[[
    "Neighbourhood",
    "  65 years and over - total",
    "  65 years and over - %",
    "    Living alone",
    "Prevalence of low income based on the Low-income cut-offs, after tax (LICO-AT) (%)",
    "Total - Age groups of the population - 25% sample data"
    
]].copy()

vulnerability_data.columns = ["Neighbourhood", "65+ Population", "Elderly Distribution", "Population living alone", "Low-Income Households (%)", "Total Population"]

vulnerability_data["Population living alone"] = pd.to_numeric(vulnerability_data["Population living alone"], errors="coerce")
vulnerability_data["Total Population"] = pd.to_numeric(vulnerability_data["Total Population"], errors="coerce")

# Calculate % living alone
vulnerability_data["% Living Alone"] = 100 * (vulnerability_data["Population living alone"] / vulnerability_data["Total Population"])

cleaned_vulnerability_data = vulnerability_data.dropna(axis = 1, how = 'all')

subset = merged_data[[ 
    "Neighbourhood", 
    "geometry", 
    "  65 years and over - total", 
    "  65 years and over - %",
    "    Living alone", 
    "Prevalence of low income based on the Low-income cut-offs, after tax (LICO-AT) (%)",
    "Total - Age groups of the population - 25% sample data"
]].copy()

subset["Population Living Alone (%)"] = 100 * (pd.to_numeric(subset["    Living alone"], errors = "coerce") / pd.to_numeric(subset["Total - Age groups of the population - 25% sample data"], errors = "coerce"))

subset = subset.rename(columns={
    "  65 years and over - total": "65+ Population",
    "  65 years and over - %": "Elderly Distribution",
    "    Living alone": "Population living alone",
    "Prevalence of low income based on the Low-income cut-offs, after tax (LICO-AT) (%)": "Low-Income Households (%)",
    "Total - Age groups of the population - 25% sample data": "Total Population"
})

subset.to_file("../data/processed/neighbourhood_stats/vulnerability_data.geojson", driver = "GeoJSON")

In [11]:
vulnerability_gdf = gpd.read_file("../data/processed/neighbourhood_stats/vulnerability_data.geojson")
ndvi_lst_df = pd.read_csv("../data/processed/neighbourhood_stats/ndvi_lst_neighbourhood_stats_2024.csv")

# Strip and title case both sides
vulnerability_gdf["Neighbourhood"] = vulnerability_gdf["Neighbourhood"].str.strip().str.title()
ndvi_lst_df["neighborhood"] = ndvi_lst_df["neighborhood"].str.strip().str.title()

merged_gdf = vulnerability_gdf.merge(ndvi_lst_df, left_on="Neighbourhood", right_on="neighborhood", how="left")

merged_gdf.to_file("../data/processed/complete_heat_risk_inputs.geojson", driver="GeoJSON")