# Autoregressive model - baseline

In [61]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg



In [210]:

def forecast_rent_price(df, house_type):
    
    # Reshape wide → long
    df_long = df.melt(id_vars="suburb", var_name="quarter", value_name="rent")
    
    # Convert quarter labels like '2022_Q1' → pandas.Period
    df_long["quarter"] = df_long["quarter"].str.replace("_", "")
    df_long["quarter"] = pd.PeriodIndex(df_long["quarter"], freq="Q")

    # Construct a dataframe for all forecasts
    quarters = ["2026Q1","2026Q2","2026Q3","2026Q4", "2027Q1","2027Q2","2027Q3","2027Q4",
                "2028Q1","2028Q2","2028Q3","2028Q4"]
    all_forecasts = pd.DataFrame(columns=["suburb"] + quarters)
    
    for suburb in df_long["suburb"]:
        ts = df_long[df_long["suburb"] == suburb]
        # Can only proceed if we have at least 4 values in the dataset
        # Set time index
        ts = ts.set_index("quarter").sort_index()
        # check there are at least 4 values present in the data prior to imputation
        if ts["rent"].notna().sum() >= 4:
            # will need to impute to have a continuos time series
            ts["rent_imp"] = ts["rent"].interpolate().ffill().bfill()
            # Fit AR model (using lag=1 for simplicity)
            model = AutoReg(ts["rent_imp"], lags=1).fit()
                
            # Forecast next 12 quarters
            forecast = model.predict(start=len(ts)+1, end=len(ts)+12)
            
            # plot the forecast
            #plt.figure(figsize=(10,5))
            #plt.plot(ts.index.to_timestamp(), ts['rent'], marker='o', label='Historical')
            #plt.plot(forecast.index, forecast.values, marker='x', linestyle='--', label='Forecast')
            #plt.xlabel('Quarter')
            #plt.ylabel('Rent')
            #plt.title(f'Rent Forecast for {house_type} in {suburb}')
            #plt.xticks(rotation=45)
            #plt.grid(True)
            #plt.legend()
            #plt.savefig(f"../data/curated/rent_growth/forecast_{suburb}_{house_type}.jpg")
            #plt.show()
    

            all_forecasts.loc[len(all_forecasts)] = [suburb] + forecast.tolist()
    all_forecasts = all_forecasts.drop_duplicates(subset="suburb", keep="first")
    print(all_forecasts)
    all_forecasts.to_csv(f"../data/curated/rent_growth/{house_type}_forecast.csv")

In [212]:
# Load data
data_path = "../data/curated/rent_growth/"
property_types = ["1 bedroom flat", "1 bedroom house", "1 bedroom other", "2 bedroom flat", "2 bedroom house", 
                 "2 bedroom other", "3 bedroom flat", "3 bedroom house", "3 bedroom other", "4 bedroom flat", 
                 "4 bedroom house", "4 bedroom other", "5 bedroom flat", "5 bedroom house", "6 bedroom house",
                 "6 bedroom other", "7 bedroom house", "8 bedroom house", "9 bedroom house"]

for house_type in property_types: 
    df = pd.read_csv(f"{data_path}{house_type}.csv")

    forecast_rent_price(df, house_type)

              suburb      2026Q1      2026Q2       2026Q3       2026Q4  \
0         ABBOTSFORD  524.425400  528.414623   531.987947   535.188733   
1           ARMADALE  534.417743  553.256781   573.273645   594.541974   
2   BALLARAT CENTRAL  315.182999  314.255662   313.749841   313.473939   
3     BRUNSWICK EAST  535.079123  556.980327   581.258704   608.172275   
4           CARNEGIE  391.824209  393.973092   395.105016   395.701256   
5            CLAYTON  545.718130  558.382150   570.917894   583.326662   
6        COLLINGWOOD  551.801842  552.534968   553.174960   553.733649   
7          DANDENONG  378.344359  377.785033   377.352007   377.016761   
8     EAST MELBOURNE  590.188292  600.064733   609.798823   619.392614   
9            FITZROY  554.283055  557.369171   560.268064   562.991090   
10         FOOTSCRAY  468.353921  471.650770   474.472194   476.886751   
11         FRANKSTON  362.313048  359.447769   357.724634   356.688366   
12         GLEN IRIS  455.639100  459.