# Livability and Affordability of Suburbs in Victoria

This notebook analyses the most livable and affordable suburbs using the enriched real estate dataset. It provides rankings and geospatial visualisations for both livability and affordability.


In [50]:
# Import libraries
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
# Load the enriched suburbs dataset
df = pd.read_csv('../../data/curated/cleaned_real_estate_data.csv')
df.head()

Unnamed: 0,postcode,weekly_rent,days_listed,bedrooms,bathrooms,carspaces,property_type,lat,lon,agency,...,Population-2023,incidents_recorded,rate_per_100000_population,population_est,crime_per_person,crime_index,crime_rank,available_day,available_month,available_year
0,3032,660.0,50.0,2.0,1.0,1.0,House,-37.77705,144.9182,Keyhole Property Investments,...,13408,16262.25,10220.71448,152460.693763,0.102207,1.813199,137.75,10,9,2025
1,3039,500.0,3.0,2.0,1.0,1.0,Apartment / Unit / Flat,-37.766006,144.91553,Simone Bullen,...,17203,10143.0,7696.915003,138420.725117,0.076969,1.365466,183.0,19,9,2025
2,3181,451.0,4938.0,1.0,1.0,1.0,Apartment / Unit / Flat,-37.85314,144.9986,Prime Property Partners Australia,...,20268,19274.333333,12658.5542,141511.437067,0.126586,2.245682,25.333333,22,5,2014
3,3186,1575.0,4839.0,2.0,2.0,1.0,Apartment / Unit / Flat,-37.9044,144.99974,Westprecinct,...,18828,9564.0,7462.65236,138790.0235,0.074627,1.323907,307.5,18,6,2022
4,3550,650.0,4654.0,2.0,2.0,1.0,House,-36.767086,144.28296,Tweed Sutherland First National,...,15236,9525.0,7471.272827,127488.3172,0.074713,1.325436,150.0,21,7,2026


In [6]:
df.columns

Index(['postcode', 'weekly_rent', 'days_listed', 'bedrooms', 'bathrooms',
       'carspaces', 'property_type', 'lat', 'lon', 'agency',
       'num_metro_bus_stops', 'num_metro_tram_stops', 'num_metro_train_stops',
       'num_regional_bus_stops', 'num_regional_train_stops', 'num_schools_2km',
       'Median_age_persons', 'Median_mortgage_repay_monthly',
       'Median_tot_prsnl_inc_weekly', 'Median_rent_weekly',
       'Median_tot_fam_inc_weekly', 'Average_num_psns_per_bedroom',
       'Median_tot_hhd_inc_weekly', 'Average_household_size',
       'Owner occupied (%)', 'Mortgage (%)', 'Total rented (%)',
       'Other tenure (%)', 'Unemployment', 'post_gradutae (%)',
       'Graduate_diploma_certificate(%)', 'Bachelor (%)',
       'Advanced_&_Diploma (%)', 'Certificate_level (%)', 'Total_persons',
       'Population-2023', 'incidents_recorded', 'rate_per_100000_population',
       'population_est', 'crime_per_person', 'crime_index', 'crime_rank',
       'available_day', 'available_month

In [27]:
# Aggregate to postcode level (use median for rents/coords, median/mean for counts)
agg = df.groupby('postcode').agg(
    median_tot_hhd_inc_weekly = ('Median_tot_hhd_inc_weekly','median'),
    num_schools_2km = ('num_schools_2km', 'median'),
    num_metro_train_stops = ('num_metro_train_stops', 'median'),
    num_metro_tram_stops = ('num_metro_tram_stops', 'median'),
    num_metro_bus_stops = ('num_metro_bus_stops', 'median'),
    crime_index = ('crime_index', 'median'),
    weekly_rent = ('weekly_rent', 'median'),
    lat = ('lat', 'median'),
    lon = ('lon', 'median'),
    count_rows = ('postcode', 'count')
).reset_index()

In [28]:
# Scale features (higher = better). Invert crime & rent.
scaler = MinMaxScaler()

def norm(series):
    return scaler.fit_transform(series.values.reshape(-1,1)).ravel()

# Livability components (higher = better). Invert crime.
agg['schools_norm'] = norm(agg['num_schools_2km'])
agg['train_norm']   = norm(agg['num_metro_train_stops'])
agg['tram_norm']    = norm(agg['num_metro_tram_stops'])
agg['bus_norm']     = norm(agg['num_metro_bus_stops'])
agg['crime_norm']   = 1 - norm(agg['crime_index'])

In [29]:
# Composite scores (chosen metrics / weights)
# Livability weights: schools 0.30, train 0.10, tram 0.10, bus 0.10, crime 0.40
agg['livability_score'] = (
    0.30 * agg['schools_norm'] +
    0.10 * agg['train_norm'] +
    0.10 * agg['tram_norm'] +
    0.10 * agg['bus_norm'] +
    0.40 * agg['crime_norm']
)

# Affordability using median_tot_hhd_inc_weekly:
# rent_to_income = weekly_rent / median_tot_hhd_inc_weekly (lower = more affordable)
agg['rent_to_income'] = agg['weekly_rent'] / agg['median_tot_hhd_inc_weekly']
agg['affordability_score'] = 1 - norm(agg['rent_to_income'])  # higher = more affordable

# Ranks
agg['livability_rank'] = agg['livability_score'].rank(ascending=False, method='min')
agg['affordability_rank'] = agg['affordability_score'].rank(ascending=False, method='min')

In [36]:
# Top lists
top_livable = agg.sort_values('livability_score', ascending=False).head(20)[
    ['postcode','livability_score','livability_rank','affordability_score']
]
top_affordable = agg.sort_values('affordability_score', ascending=False).head(20)[
    ['postcode','affordability_score','affordability_rank','livability_score']
]

print("Top 20 most livable (postcode proxy):")
print(top_livable.to_string(index=False))
print("\nTop 20 most affordable (postcode proxy):")
print(top_affordable.to_string(index=False))

Top 20 most livable (postcode proxy):
 postcode  livability_score  livability_rank  affordability_score
     3126          0.733811              1.0             0.745382
     3123          0.731158              2.0             0.792613
     3183          0.717356              3.0             0.765768
     3162          0.712354              4.0             0.759937
     3068          0.688550              5.0             0.732063
     3185          0.673369              6.0             0.777010
     3122          0.666856              7.0             0.767968
     3144          0.656467              8.0             0.732655
     3204          0.655334              9.0             0.684032
     3103          0.654143             10.0             0.680877
     3189          0.654015             11.0             0.724151
     3142          0.653293             12.0             0.748000
     3124          0.652378             13.0             0.771147
     3146          0.651324           

In [46]:
import geopandas as gpd
import folium
from matplotlib import cm, colors

# Load the shapefile for postcode boundaries
postcode_gdf = gpd.read_file('../../data/landing/boundaries/SA2_2021_AUST_GDA2020.shp')

# Center on Victoria (approximate center)
vic_center = [-37.0, 144.5]
m_livability = folium.Map(location=vic_center, zoom_start=7, tiles='cartodbpositron')
m_affordability = folium.Map(location=vic_center, zoom_start=7, tiles='cartodbpositron')

# Select top 20 only
top_n = 20
top_livable = agg.nlargest(top_n, 'livability_score').reset_index(drop=True)
top_affordable = agg.nlargest(top_n, 'affordability_score').reset_index(drop=True)

# Colormaps and normalization (use top-20 range so colours are meaningful)
cmap_liv = cm.get_cmap('YlGn')
cmap_aff = cm.get_cmap('YlOrRd')

liv_min, liv_max = top_livable['livability_score'].min(), top_livable['livability_score'].max()
aff_min, aff_max = top_affordable['affordability_score'].min(), top_affordable['affordability_score'].max()


The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.


The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.



In [48]:
# Plot top 20 livable
for i, row in top_livable.iterrows():
    if np.isnan(row['lat']) or np.isnan(row['lon']):
        continue
    val = 0.5
    if liv_max > liv_min:
        val = (row['livability_score'] - liv_min) / (liv_max - liv_min)
    hexcol = colors.to_hex(cmap_liv(val))
    radius = 7
    # highlight the best (first row from nlargest)
    if i == 0:
        radius = 12
        hexcol = '#0000FF'  # blue for best
    folium.CircleMarker(
        location=[row['lat'], row['lon']],
        radius=radius,
        fill=True,
        fill_opacity=0.9,
        color=hexcol,
        fill_color=hexcol,
        tooltip=f"Postcode: {row['postcode']}<br>Livability: {row['livability_score']:.3f}<br>Rank: {int(row['livability_rank']) if 'livability_rank' in row else 'N/A'}"
    ).add_to(m_livability)
    
display(m_livability)


In [49]:
# Plot top 20 affordable
for i, row in top_affordable.iterrows():
    if np.isnan(row['lat']) or np.isnan(row['lon']):
        continue
    val = 0.5
    if aff_max > aff_min:
        val = (row['affordability_score'] - aff_min) / (aff_max - aff_min)
    hexcol = colors.to_hex(cmap_aff(val))
    radius = 7
    # highlight the best (first row from nlargest)
    if i == 0:
        radius = 12
        hexcol = '#800080'  # purple for best affordable
    folium.CircleMarker(
        location=[row['lat'], row['lon']],
        radius=radius,
        fill=True,
        fill_opacity=0.9,
        color=hexcol,
        fill_color=hexcol,
        tooltip=f"Postcode: {row['postcode']}<br>Affordability: {row['affordability_score']:.3f}<br>Rank: {int(row['affordability_rank']) if 'affordability_rank' in row else 'N/A'}"
    ).add_to(m_affordability)
    
display(m_affordability)
