In [36]:
import numpy as np
import pandas as pd
import pmdarima as pm
import matplotlib.pyplot as plt

In [41]:
historical_data = pd.read_csv('../../data/raw/historical_rent_by_suburb.csv')

historical_data.drop(columns=['Unnamed: 1'], inplace=True)
historical_data.rename(columns={'Unnamed: 0': 'Suburb'}, inplace=True)
historical_data.set_index('Suburb', inplace=True)

all_columns = [col for col in historical_data.columns if "." in col]
historical_data = historical_data[all_columns]

historical_data.rename(columns=lambda x: x.split(".")[0], inplace=True)
historical_data = historical_data.iloc[1:, :]

historical_data = historical_data.reset_index()

historical_data = historical_data.replace(r'\$', '', regex=True)
historical_data = historical_data[~historical_data.isin(['-']).any(axis=1)]

historical_data= historical_data[historical_data['Suburb'] !=  'Group Total']

# cols_to_drop = historical_data.columns[1:81]
# historical_data = historical_data.drop(columns=cols_to_drop)

for col in historical_data.columns:
    if col != 'Suburb':
        historical_data[col] = pd.to_numeric(historical_data[col], errors='coerce')

avg_row = historical_data.drop(columns=['Suburb']).mean(numeric_only=True)
avg_row['Suburb'] = 'Average'
historical_data = pd.concat([historical_data, pd.DataFrame([avg_row])], ignore_index=True)

historical_data

Unnamed: 0,Suburb,Mar-00,Jun-00,Sep-00,Dec-00,Mar-01,Jun-01,Sep-01,Dec-01,Mar-02,...,Dec-22,Mar-23,Jun-23,Sep-23,Dec-23,Mar-24,Jun-24,Sep-24,Dec-24,Mar-25
0,Albert Park-Middle Park-West St Kilda,260.000000,260.000000,270.000000,275.000000,275.000000,280.000000,280.000000,290.000000,300.000000,...,525.000000,545.000000,555.000000,600.000000,600.000000,660.000000,675.000000,693.000000,700.000000,700.000000
1,Armadale,200.000000,200.000000,205.000000,210.000000,215.000000,220.000000,225.000000,230.000000,233.000000,...,460.000000,490.000000,500.000000,525.000000,560.000000,560.000000,590.000000,600.000000,600.000000,625.000000
2,Carlton North,260.000000,260.000000,265.000000,270.000000,270.000000,275.000000,280.000000,280.000000,290.000000,...,600.000000,620.000000,630.000000,650.000000,670.000000,680.000000,690.000000,680.000000,700.000000,720.000000
3,Carlton-Parkville,251.000000,260.000000,260.000000,260.000000,260.000000,260.000000,260.000000,270.000000,270.000000,...,450.000000,500.000000,530.000000,550.000000,550.000000,570.000000,580.000000,585.000000,600.000000,600.000000
4,CBD-St Kilda Rd,320.000000,320.000000,320.000000,320.000000,320.000000,320.000000,320.000000,320.000000,320.000000,...,480.000000,550.000000,580.000000,600.000000,620.000000,645.000000,650.000000,650.000000,650.000000,650.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,Wanagaratta,125.000000,125.000000,130.000000,130.000000,130.000000,130.000000,130.000000,135.000000,135.000000,...,380.000000,380.000000,390.000000,390.000000,395.000000,400.000000,400.000000,420.000000,420.000000,411.000000
142,Warragul,130.000000,135.000000,135.000000,135.000000,135.000000,135.000000,140.000000,140.000000,150.000000,...,430.000000,440.000000,450.000000,450.000000,460.000000,473.000000,480.000000,490.000000,500.000000,520.000000
143,Warrnambool,130.000000,135.000000,135.000000,135.000000,140.000000,140.000000,140.000000,145.000000,145.000000,...,420.000000,420.000000,430.000000,450.000000,460.000000,460.000000,475.000000,480.000000,480.000000,500.000000
144,Wodonga,145.000000,145.000000,150.000000,150.000000,150.000000,150.000000,150.000000,150.000000,150.000000,...,410.000000,410.000000,420.000000,420.000000,430.000000,440.000000,450.000000,450.000000,460.000000,475.000000


In [38]:
def forecast_suburb(data):
    plot = None
    return plot

In [39]:
results = {}
forecasts = {}

for suburb in historical_data['Suburb'].unique():
    print(f"Processing suburb: {suburb}")
    # Extract suburb data
    suburb_data = historical_data[historical_data['Suburb'] == suburb].drop(columns=['Suburb']).T
    suburb_data.index = pd.to_datetime(suburb_data.index, format='%b-%y')
    suburb_data.index = suburb_data.index.to_period("Q")
    suburb_data.columns = [suburb]

    ts = suburb_data[suburb].astype(float)
    
    # Fit auto_arima
    model = pm.auto_arima(
        ts,
        seasonal=True,        # Quarterly data, so seasonality makes sense
        m=4,                  # 4 quarters in a year
        trace=True,           # Prints progress
        error_action='ignore',
        suppress_warnings=True
    )
    
    # Store the fitted model
    results[suburb] = model
    
    # Forecast next 20 quarters (5 years)
    forecast = model.predict(n_periods=20)
    forecast_index = pd.period_range(ts.index[-1] + 1, periods=20, freq='Q')
    forecast_series = pd.Series(forecast, index=forecast_index)
    
    forecasts[suburb] = forecast_series
    
    # Plot historical + forecast
    plt.figure(figsize=(10, 5))
    plt.plot(ts.index.to_timestamp(), ts, label='Historical')
    plt.plot(forecast_series.index.to_timestamp(), forecast_series, label='Forecast', linestyle='--')
    plt.title(f"{suburb} Forecast for Next 5 Years")
    plt.xlabel("Date")
    plt.ylabel("Value")
    plt.legend()
    plt.grid(True)
    
    # Save figure
    plt.savefig(f"../../Graphs/time_series_each_suburb/{suburb}_forecast.png")
    plt.close()  # Close to avoid displaying immediately

Processing suburb: Albert Park-Middle Park-West St Kilda
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,0,1)[4] intercept   : AIC=780.260, Time=0.39 sec
 ARIMA(0,1,0)(0,0,0)[4] intercept   : AIC=797.555, Time=0.02 sec
 ARIMA(1,1,0)(1,0,0)[4] intercept   : AIC=800.319, Time=0.09 sec
 ARIMA(0,1,1)(0,0,1)[4] intercept   : AIC=800.656, Time=0.12 sec
 ARIMA(0,1,0)(0,0,0)[4]             : AIC=806.736, Time=0.02 sec
 ARIMA(2,1,2)(0,0,1)[4] intercept   : AIC=783.979, Time=0.37 sec
 ARIMA(2,1,2)(1,0,0)[4] intercept   : AIC=783.977, Time=0.35 sec
 ARIMA(2,1,2)(2,0,1)[4] intercept   : AIC=782.599, Time=0.66 sec
 ARIMA(2,1,2)(1,0,2)[4] intercept   : AIC=782.245, Time=0.62 sec
 ARIMA(2,1,2)(0,0,0)[4] intercept   : AIC=782.004, Time=0.21 sec
 ARIMA(2,1,2)(0,0,2)[4] intercept   : AIC=785.372, Time=0.45 sec
 ARIMA(2,1,2)(2,0,0)[4] intercept   : AIC=785.806, Time=0.52 sec
 ARIMA(2,1,2)(2,0,2)[4] intercept   : AIC=774.911, Time=0.68 sec
 ARIMA(1,1,2)(2,0,2)[4] intercept   : AIC=781.151, Time

In [None]:


# Fit auto_arima
model = pm.auto_arima(
    ts,
    seasonal=True,        # Quarterly data, so seasonality makes sense
    m=4,                  # 4 quarters in a year
    trace=True,           # Prints progress
    error_action='ignore',
    suppress_warnings=True
)

# Store the fitted model
results['Average'] = model

# Forecast next 20 quarters (5 years)
forecast = model.predict(n_periods=20)
forecast_index = pd.period_range(ts.index[-1] + 1, periods=20, freq='Q')
forecast_series = pd.Series(forecast, index=forecast_index)

forecasts[suburb] = forecast_series

# Plot historical + forecast
plt.figure(figsize=(10, 5))
plt.plot(ts.index.to_timestamp(), ts, label='Historical')
plt.plot(forecast_series.index.to_timestamp(), forecast_series, label='Forecast', linestyle='--')
plt.title(f"{suburb} Forecast for Next 5 Years")
plt.xlabel("Date")
plt.ylabel("Value")
plt.legend()
plt.grid(True)

# Save figure
plt.savefig(f"../../Graphs/time_series_each_suburb/{suburb}_forecast.png")
plt.close()