# Autoregressive model - baseline

In [61]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg



In [314]:

def forecast_rent_price(df, house_type):
    
    # Reshape wide → long
    df_long = df.melt(id_vars="suburb", var_name="quarter", value_name="rent")
    
    # Convert quarter labels like '2022_Q1' → pandas.Period
    df_long["quarter"] = df_long["quarter"].str.replace("_", "")
    df_long["quarter"] = pd.PeriodIndex(df_long["quarter"], freq="Q")

    # Construct a dataframe for all forecasts
    quarters = ["2025Q3","2026Q1","2026Q2","2026Q3","2026Q4", "2027Q1","2027Q2","2027Q3","2027Q4",
                "2028Q1","2028Q2","2028Q3", "2028Q4", "2029Q1", "2029Q2", "2029Q3", "2029Q4", "2030Q1", 
                "2030Q2", "2030Q3"]
    all_forecasts = pd.DataFrame(columns=["suburb"] + quarters)
    for suburb in df["suburb"]:
        ts = df_long[df_long["suburb"] == suburb]
        
        # Set time index
        ts = ts.set_index("quarter").sort_index()
        # check there are at least 4 values present in the data prior to imputation
        if ts["rent"].notna().sum() >= 4:
            # will need to impute to have a continuos time series
            ts["rent_imp"] = ts["rent"].interpolate().ffill().bfill()
            # Fit AR model (using lag=1 for simplicity)
            model = AutoReg(ts["rent_imp"], lags=1).fit()
                
            # Forecast next 5 years => 20 quarters
            forecast = model.predict(start=len(ts), end=len(ts)+19)
            # plot the forecast
            #plt.figure(figsize=(10,5))
            #plt.plot(ts.index.to_timestamp(), ts['rent'], marker='o', label='Historical')
            #plt.plot(forecast.index, forecast.values, marker='x', linestyle='--', label='Forecast')
            #plt.xlabel('Quarter')
            #plt.ylabel('Rent')
            #plt.title(f'Rent Forecast for {house_type} in {suburb}')
            #plt.xticks(rotation=45)
            #plt.grid(True)
            #plt.legend()
            #plt.savefig(f"../data/curated/rent_growth/forecast_{suburb}_{house_type}.jpg")
            #plt.show()
    

            all_forecasts.loc[len(all_forecasts)] = [suburb] + forecast.tolist()
    #all_forecasts = all_forecasts.drop_duplicates(subset="suburb", keep="first")
    #print(all_forecasts)
    all_forecasts.to_csv(f"../data/curated/rent_growth/{house_type}_forecast.csv")
    return all_forecasts

# Calculate the maximum growth suburb 
for each property type

In [342]:
def calc_max_growth(df, housetype, forecast_df):
    """
    Calculates the maximum growth suburb from the data frame of housetypes
    
    """
    merged_df = pd.merge(df, forecast_df, on="suburb", how="inner")
    
    # compare current median with furthest forecast
    # CURRENTLY USING Q2 SINCE CASING ISSUE
    merged_df["growth"] = merged_df["2030Q3"] - merged_df["2025_Q2"]
    if len(merged_df["growth"].dropna()) >0:
        max_growth = max(merged_df["growth"].dropna())
        max_growth_suburb = merged_df.loc[merged_df["growth"].dropna().idxmax(), "suburb"]
        return max_growth, max_growth_suburb
    else:
        return None
    

# call functions

In [346]:
# Load data
data_path = "../data/curated/rent_growth/"
property_types = ["1 bedroom flat", "1 bedroom house", "1 bedroom other", "2 bedroom flat", "2 bedroom house", 
                 "2 bedroom other", "3 bedroom flat", "3 bedroom house", "3 bedroom other", "4 bedroom flat", 
                 "4 bedroom house", "4 bedroom other", "5 bedroom flat", "5 bedroom house", "6 bedroom house",
                 "6 bedroom other", "7 bedroom house", "8 bedroom house", "9 bedroom house"]
max_growths = pd.DataFrame()
for house_type in property_types: 
    df = pd.read_csv(f"{data_path}{house_type}.csv")

    forecast_df = forecast_rent_price(df, house_type)
    if len(forecast_df) > 0:
        max_growths[house_type] = calc_max_growth(df,house_type, forecast_df)

print(max_growths)

   1 bedroom flat 2 bedroom flat 2 bedroom house 3 bedroom flat  \
0     1188.780574     685.548132      191.110542     542.631688   
1  PORT MELBOURNE      GLEN IRIS    MOONEE PONDS       ARMADALE   

  3 bedroom house 4 bedroom house 5 bedroom house  
0     2470.423874     4526.752018            None  
1    GEELONG WEST        HASTINGS            None  
