In [7]:
import pandas as pd 

unit_1bed = pd.read_csv("../data/processed/forecast/1_bedroom_flat_forecast.csv")
unit_2bed = pd.read_csv("../data/processed/forecast/2_bedroom_flat_forecast.csv")
unit_3bed = pd.read_csv("../data/processed/forecast/3_bedroom_flat_forecast.csv")
house_2bed = pd.read_csv("../data/processed/forecast/2_bedroom_house_forecast.csv")
house_3bed = pd.read_csv("../data/processed/forecast/3_bedroom_house_forecast.csv")
house_4bed = pd.read_csv("../data/processed/forecast/4_bedroom_house_forecast.csv")

house_4bed.head(1)


Unnamed: 0,Suburb,2000-01,2000-04,2000-07,2000-10,2001-01,2001-04,2001-07,2001-10,2002-01,...,2027-10,2028-01,2028-04,2028-07,2028-10,2029-01,2029-04,2029-07,2029-10,2030-01
0,Albert Park-Middle Park-West St Kilda,500.0,550.0,525.0,513.0,550.0,523.0,510.0,520.0,520.0,...,1513.245816,1522.430548,1531.615279,1540.800011,1549.984742,1559.169473,1568.354205,1577.538936,1586.723668,1595.908399


In [31]:
def cleaning(df, name):
    list = []
    for col in df.columns:
        if col not in ['Suburb'] and col < '2025-03':
            list.append(col)

    df_cleaned = df.drop(columns=list)

    df_cleaned = df_cleaned.rename(columns={'2025-04': f'{name}_Rent_today'})

    return df_cleaned


def growth(df, name):
    # avg of all columns except first two, as you intended
    df[f"{name}_avg_future_rent"] = df.iloc[:, 2:].mean(axis=1)

    drop = []
    for col in df.columns:
        # keep only Suburb, today, and the avg column
        if col not in ['Suburb', f'{name}_Rent_today', f'{name}_avg_future_rent']:
            drop.append(col)

    df_growth = df.drop(columns=drop, errors='ignore')

    # your formula, fixed column names
    df_growth[f'{name}_Price_change (%)'] = (
        (df_growth[f'{name}_avg_future_rent'] - df_growth[f'{name}_Rent_today'])
        / df_growth[f'{name}_avg_future_rent'] * 100
    )

    df_sorted = df_growth.sort_values(by=f'{name}_Price_change (%)', ascending=False)
    df_sorted['Suburb'] = df_sorted['Suburb'].str.upper()

    return df_sorted


Apartments

In [46]:
ap1bed = cleaning(unit_1bed, 'unit_1bed')
apart_1bed = growth(ap1bed, 'unit_1bed')

ap2bed = cleaning(unit_2bed, 'unit_2bed')
apart_2bed = growth(ap2bed, 'unit_2bed')

ap3bed = cleaning(unit_3bed, 'unit_3bed')
apart_3bed = growth(ap3bed, 'unit_3bed')

merged = apart_1bed.merge(apart_2bed, on='Suburb', how='left').merge(apart_3bed, on='Suburb', how='left')


merged['ALL_avg_price_change(%)']=(merged['unit_1bed_Price_change (%)']
                                   +merged['unit_2bed_Price_change (%)']
                                   +merged['unit_3bed_Price_change (%)'])/3

merged['ALL_future_rent']=(merged['unit_1bed_avg_future_rent']
                                   +merged['unit_2bed_avg_future_rent']
                                   +merged['unit_3bed_avg_future_rent'])/3

sorted_apartments = merged.sort_values(by='ALL_avg_price_change(%)', ascending=False)

sorted_apartments.head()



Unnamed: 0,Suburb,unit_1bed_Rent_today,unit_1bed_avg_future_rent,unit_1bed_Price_change (%),unit_2bed_Rent_today,unit_2bed_avg_future_rent,unit_2bed_Price_change (%),unit_3bed_Rent_today,unit_3bed_avg_future_rent,unit_3bed_Price_change (%),ALL_avg_price_change(%),ALL_future_rent
1,SHEPPARTON,286.107764,354.365625,19.261987,377.176166,485.050665,22.239841,493.666283,593.577056,16.83198,19.444603,477.664449
0,FLORA HILL-BENDIGO EAST,361.428985,498.738037,27.531297,375.253095,429.670208,12.664856,443.203802,469.504544,5.601808,15.265987,465.97093
64,MILDURA,275.110827,293.523362,6.272937,350.499146,456.7388,23.260484,456.844737,539.299015,15.289158,14.940859,429.853726
4,SWAN HILL,222.412654,260.739968,14.699439,355.378503,409.163538,13.145119,449.073151,471.911576,4.839556,10.894705,380.605027
78,CROYDON-LILYDALE,351.266015,371.452729,5.434531,504.663412,552.886982,8.722139,624.818144,763.727549,18.188345,10.781672,562.689087


In [54]:
def cleanPrice(name):
    df = pd.read_csv(f"../data/raw/{name}_prices.csv", sep=';')

    df=df.rename(columns={'Q4-2024':'Q4_2024'})

    columns_d=['Q3_2024', 'Q4_2024']

    units = df.rename(columns={'Q1_2025':'unit_price'})

    units = units.drop(columns=columns_d)

    apartments = sorted_apartments.merge(units, on='Suburb', how='left')

    apartments = sorted_apartments.merge(units, on='Suburb', how='left')

    apartments['UNIT Annual ROI (%)'] = apartments['ALL_future_rent'] / apartments['unit_price'] * 100 

    apartments = apartments.sort_values(by='UNIT Annual ROI (%)', ascending=False)

    return apartments 



Return on Investment (ROI) for all types of apartments 

In [55]:

apartments = cleanPrice('apartment')

apartments.head()

Unnamed: 0,Suburb,unit_1bed_Rent_today,unit_1bed_avg_future_rent,unit_1bed_Price_change (%),unit_2bed_Rent_today,unit_2bed_avg_future_rent,unit_2bed_Price_change (%),unit_3bed_Rent_today,unit_3bed_avg_future_rent,unit_3bed_Price_change (%),ALL_avg_price_change(%),ALL_future_rent,unit_price,Num_sales_units,UNIT Annual ROI (%)
60,SOUTH YARRA,500.103639,524.723373,4.691945,707.337936,758.814716,6.78384,1068.24779,1162.642711,8.118997,6.531594,815.3936,575000.0,191.0,0.141808
101,DOCKLANDS,592.114321,618.238675,4.22561,762.706321,820.544703,7.048779,1190.106469,1217.148355,2.221741,4.49871,885.310578,635000.0,117.0,0.139419
56,MALVERN EAST,475.050119,510.82681,7.003683,602.386465,644.219973,6.493668,792.385896,844.557308,6.177368,6.55824,666.534697,486000.0,61.0,0.137147
100,ST KILDA,458.006045,459.76566,0.38272,620.554908,670.861574,7.498815,949.211969,1006.500877,5.691889,4.524474,712.376037,535000.0,159.0,0.133154
7,DANDENONG,365.493382,393.101415,7.023132,450.552404,524.826094,14.152058,603.488639,666.006339,9.386953,10.187381,527.977949,400000.0,118.0,0.131994


HOUSES

In [56]:
h2bed = cleaning(house_2bed, 'house_2bed')
house_2bed = growth(h2bed, 'house_2bed')

h3bed = cleaning(house_3bed, 'house_3bed')
house_3bed = growth(h3bed, 'house_3bed')

h4bed = cleaning(house_4bed, 'house_4bed')
house_4bed = growth(h4bed, 'house_4bed')

merged = house_2bed.merge(house_3bed, on='Suburb', how='left').merge(house_4bed, on='Suburb', how='left')

merged['ALL_avg_price_change(%)']=(merged['house_2bed_Price_change (%)']
                                   +merged['house_3bed_Price_change (%)']
                                   +merged['house_4bed_Price_change (%)'])/3

merged['ALL_future_rent']=(merged['house_2bed_avg_future_rent']
                                   +merged['house_3bed_avg_future_rent']
                                   +merged['house_4bed_avg_future_rent'])/3

sorted_houses = merged.sort_values(by='ALL_avg_price_change(%)', ascending=False)

sorted_houses.head()

Unnamed: 0,Suburb,house_2bed_Rent_today,house_2bed_avg_future_rent,house_2bed_Price_change (%),house_3bed_Rent_today,house_3bed_avg_future_rent,house_3bed_Price_change (%),house_4bed_Rent_today,house_4bed_avg_future_rent,house_4bed_Price_change (%),ALL_avg_price_change(%),ALL_future_rent
3,MILDURA,385.335994,234.991675,-63.978572,472.138107,306.469376,-54.057189,581.745307,373.303013,-55.837292,-57.957685,304.921355
0,SHEPPARTON,392.174433,250.890172,-56.313191,460.256032,270.857353,-69.925618,591.077872,348.707392,-69.505404,-65.248071,290.151639
8,MOE-NEWBOROUGH,345.723032,204.160528,-69.338821,408.836834,239.283444,-70.858805,540.60677,341.020639,-58.526115,-66.241247,261.488204
2,WODONGA,433.343051,267.516442,-61.987445,487.475313,291.026904,-67.501803,589.268022,347.769374,-69.442184,-66.310478,302.10424
101,FERNTREE GULLY,513.394039,274.223752,-87.217203,601.816654,351.269973,-71.32596,731.672448,519.515394,-40.837491,-66.460218,381.669706


In [57]:
houses = cleanPrice('House')

houses.head()

Unnamed: 0,Suburb,unit_1bed_Rent_today,unit_1bed_avg_future_rent,unit_1bed_Price_change (%),unit_2bed_Rent_today,unit_2bed_avg_future_rent,unit_2bed_Price_change (%),unit_3bed_Rent_today,unit_3bed_avg_future_rent,unit_3bed_Price_change (%),ALL_avg_price_change(%),ALL_future_rent,unit_price,Num_sales,UNIT Annual ROI (%)
0,SHEPPARTON,286.107764,354.365625,19.261987,377.176166,485.050665,22.239841,493.666283,593.577056,16.83198,19.444603,477.664449,470500.0,188.0,0.101523
32,BAIRNSDALE,287.358007,308.41465,6.827381,371.321337,439.34408,15.482795,504.677529,510.220223,1.086334,7.798836,419.326317,450000.0,35.0,0.093184
5,HORSHAM,286.848164,353.692839,18.899075,302.376685,322.861553,6.344784,403.61831,432.931999,6.770968,10.671609,369.828797,413100.0,68.0,0.089525
2,MILDURA,275.110827,293.523362,6.272937,350.499146,456.7388,23.260484,456.844737,539.299015,15.289158,14.940859,429.853726,485000.0,165.0,0.08863
53,CORIO,302.2,324.2,6.785935,373.763996,398.164214,6.12818,430.798545,462.739771,6.902633,6.605582,395.034662,480000.0,81.0,0.082299


In [58]:
apartments.to_csv('../data/output/apartments_growth.csv')

houses.to_csv('../data/output/houses_growth.csv')