# Assignment 3

`"Divination is turning out to be much more trouble than I could have foreseen, never having studied the subject myself."
-J.K. Rowling, Harry Potter and the Half-Blood Prince`

---

## Libraries

#### IMPORTANT NOTES:

- Use the `display` function from `IPython.display` for printing pandas dataframes.

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from IPython.display import display
from tsa_functions import *

register_matplotlib_converters()
sns.set_style('darkgrid')

np.set_printoptions(precision=4)
pd.set_option('precision', 4)

import statsmodels.api as sm
from statsmodels.tsa.exponential_smoothing.ets import ETSModel
from statsmodels.tsa.arima.model import ARIMA

In [2]:
import warnings
warnings.filterwarnings("ignore")

from itertools import product
from IPython.display import clear_output
import tqdm

pd.set_option('display.max_colwidth', -1)

sm.__version__

'0.12.2'

In [3]:
def mae(y_true, y_pred):
    score = np.mean(np.abs(y_true - y_pred))
    return score


def rmse(y_true, y_pred):
    if len(y_true) != len(y_pred):
        raise ValueError('Lengths Mismatch')
    score = np.sqrt(np.mean((y_true - y_pred)**2))
    return score


def mase(y_true, y_pred, s_ts):
    if len(y_true) != len(y_pred):
        raise ValueError('Lengths Mismatch')
    ts = s_ts.to_numpy()
    score = np.mean(
        np.abs((y_true - y_pred)/np.mean(np.abs(ts[1:] - ts[:-1]))))
    return score


def rmsse(y_true, y_pred, ts):
    if len(y_true) != len(y_pred):
        raise ValueError('Lengths Mismatch')
    ts = ts.to_numpy()
    score = np.sqrt(
        np.mean(((y_true - y_pred)/np.mean(np.abs(ts[1:] - ts[:-1])))**2))
    return score


def rateMyForecast(train, test, forecast):
    """
    Evalute the forcast per group, given train, test, and forecast tables.

    The function evaluates the metrics per column of the provided table.

    Parameters
    ----------
    train : DataFrame
        DataFrame contaning the train set.
    test : DataFrame
        DataFrame contaning the train set.
    forecast : DataFrame
        DataFrame contaning the train set.

    Returns
    -------
    DataFrame
        DataFrame contaning the metrics as columns, groups as rows,
        and scores as values.

    """
    res = pd.DataFrame([
        {'Group': col,
         'RMSE': rmse(test[col], forecast[col]),
         'MAE': mae(test[col], forecast[col]),
         'MASE': mase(test[col], forecast[col], train[col]),
         'RMSSE': rmsse(test[col], forecast[col], train[col])}
        for col in test])
    display(res.set_index('Group'))
    return res.set_index('Group')


def timeSeriesSplit(design_set, h, val_size):
    val_end = len(design_set)
    divider = val_end - h
    dataset = []
    while len(design_set) - divider <= val_size:
        dataset.append(
            (design_set[np.arange(0, divider)],
             design_set[np.arange(divider, val_end)])
        )
        val_end -= 1
        divider -= 1
    return dataset[::-1]


def scorer(name, est, config, design_set, h, val_size):
    model_params = config.copy()
    fit_params = {}
#     if name == 'ETSModel':
#         fit_params['optimized'] = model_params.pop('optimized'),
#         fit_params['use_boxcox'] = model_params.pop('use_boxcox'),
#         fit_params['remove_bias'] = model_params.pop('remove_bias')

    dataset = timeSeriesSplit(design_set, h, val_size)
    scores = []
    fit = np.nan
    for X, y in dataset:
        try:
            model = est(X, **model_params)
            fit = model.fit(**fit_params)
            y_hat = fit.forecast(h)
            scores.append(rmse(y, y_hat))
        except:
            scores.append(np.nan)

    res = {
        'Model': name,
        'Config': config,
        'Average RMSE': np.nanmean(scores),
        'Stdev RMSE': np.nanstd(scores)}
    res['Sum'] = res['Average RMSE'] + res['Stdev RMSE']
    return res


def bestModel(design_set):

    ########################
    ## Parameter Building ##
    ########################

    seasonal_periods = 4
    h = 8
    val_size = 16

    ets_param_grid = {
        'error': ['add', 'mul'],
        'trend': ['add', 'mul', None],
        'damped_trend': [True, False],
        'seasonal': ['add', 'mul', None],
        'seasonal_periods': [seasonal_periods],
        'initialization_method': ['estimated', 'heuristic', 'known'],
        #         'optimized': [True],
        #         'use_boxcox': [True, False],
        #         'remove_bias': [True, False]

    }
    arm_param_grid = {
        'order': list(product([0, 1, 2, 4], range(0, 3), range(0, 3))),
        'trend': ['n', 'c', 't', 'ct'],
    }

    ets_param_dict = {params: dict(zip(ets_param_grid.keys(), params))
                      for params
                      in product(*ets_param_grid.values())}
    arm_param_dict = {params: dict(zip(arm_param_grid.keys(), params))
                      for params
                      in product(*arm_param_grid.values())}

    ####################
    ## Scoring Proper ##
    ####################

    scores = []
    for name, config in ets_param_dict.items():
        scores.append(
            scorer('ETSModel', ETSModel, config, design_set, h, val_size))
    for name, config in arm_param_dict.items():
        scores.append(
            scorer('ARIMA', ARIMA, config, design_set, h, val_size))

    return pd.DataFrame(scores)


def forecastUsingConfig(regions, design_set, test_set):
    forecast = {}
    for region in regions:
        if region['Model'] == 'ARIMA':
            model = ARIMA(design_set[region['Region']], **region['Config'])
        else:
            model = ETSModel(design_set[region['Region']], **region['Config'])
        fit = model.fit()
        forecast[region['Region']] = fit.forecast(len(df_test))
    forecast_set = pd.DataFrame(forecast)
    forecast_set.index = test_set.index
    return forecast_set

---

## Forecasting Australian Domestic Tourism - REDUX

For this homework, we'll be continuing our exploration of forecasting hierarchical and grouped time series.

Load the Australian domestic tourism dataset and perform the following:

- Recode the `State` variable, similar to what we did in the discussion notebook.

- Create a hierarchical time series with overnight trips in regions at the bottom level of the hierarchy (which can then be aggregated to states, then aggregated to the national `Total`), similar to what we did in the discussion notebook.

- Re-index the series using `pd.period_range` instead of the strings in `Quarter`.

- Withhold the last two years (8 quarters) as a test set. 

- Withhold the last four years (16 quarters) in the training set as a validation set.

In [4]:
state_dict = {
    'New South Wales': 'NSW',
    'Northern Territory': 'NT',
    'Queensland': 'QLD',
    'South Australia': 'SA',
    'Tasmania': 'TAS',
    'Victoria': 'VIC',
    'Western Australia': 'WA'}
df = pd.read_csv('special_datasets/tourism.csv')
df.State.replace(state_dict, inplace=True)

df.index = pd.DatetimeIndex(df.Quarter).to_period('Q')
# df['Year'] = pd.to_datetime(df.Quarter).dt.year
# df['Quarter'] = pd.to_datetime(df.Quarter).dt.quarter

df_pivot = df.pivot_table(
    values='Trips',
    index=df.index,
    columns=['State', 'Region'],
    aggfunc=np.sum)

df_design = df_pivot.loc[: '2015Q4']
df_train = df_pivot.loc[: '2013Q4']
df_val = df_pivot.loc['2012Q1': '2015Q4']
df_test = df_pivot.loc['2016Q1':]
display(df_train, df_val, df_test)

State,ACT,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,...,VIC,VIC,VIC,VIC,VIC,WA,WA,WA,WA,WA
Region,Canberra,Blue Mountains,Capital Country,Central Coast,Central NSW,Hunter,New England North West,North Coast NSW,Outback NSW,Riverina,...,Phillip Island,Spa Country,Upper Yarra,Western Grampians,Wimmera,Australia's Coral Coast,Australia's Golden Outback,Australia's North West,Australia's South West,Experience Perth
Quarter,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1998Q1,551.0019,195.5432,261.8081,455.1668,424.3954,674.9785,329.8385,1342.3012,96.6027,215.2689,...,364.0433,86.1960,102.7910,86.9966,18.8047,132.5164,161.7269,120.7755,474.8587,751.2120
1998Q2,416.0256,200.4087,238.6000,343.4768,557.0116,791.8872,328.3660,1161.1086,120.6484,336.0949,...,166.2436,74.3569,74.8551,84.9400,52.4823,172.6154,164.9738,158.4044,411.6223,668.7107
1998Q3,436.0290,253.3597,184.4238,303.6709,555.5280,566.0406,324.8592,839.3681,110.0738,228.0654,...,83.0328,60.7785,59.4654,79.9749,35.6576,173.9043,206.8799,184.6190,360.0397,662.8507
1998Q4,449.7984,245.5738,229.0889,331.8171,590.1584,591.5689,279.7962,1183.8600,185.4700,201.1235,...,215.6779,46.0133,35.2389,116.2356,27.2045,207.0026,198.5096,138.8783,462.6201,832.7065
1999Q1,378.5728,290.4841,252.2820,466.8156,480.1293,785.1028,327.4919,1306.8775,78.3638,200.8224,...,263.7101,65.2992,67.8235,101.7656,50.2199,198.8566,140.2134,103.3371,562.9746,830.3057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012Q4,511.7661,131.0548,287.3246,340.7241,505.6835,795.4073,350.3062,1199.5305,62.8140,191.4725,...,169.6084,55.4732,72.0509,89.8130,43.1522,152.1080,184.3617,137.4486,547.8138,778.8894
2013Q1,524.5536,193.2221,304.0950,377.2229,363.1995,711.2954,314.9118,1234.2089,45.9733,182.3122,...,246.1275,39.6718,71.5825,59.2859,24.5524,171.1355,156.8752,85.5274,583.7245,816.7590
2013Q2,475.5325,181.7916,205.4075,346.6879,560.3985,680.0313,268.2175,936.4144,104.1240,271.8371,...,188.9554,68.3296,48.1736,74.2560,36.4962,168.1334,193.9896,161.4666,538.9682,836.0121
2013Q3,506.5121,197.8199,248.7887,258.9110,404.9852,697.9204,299.0346,1011.6567,106.6543,205.5485,...,117.3503,66.1651,22.0000,58.3112,15.1660,180.1881,166.2001,227.2310,419.9423,750.4847


State,ACT,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,...,VIC,VIC,VIC,VIC,VIC,WA,WA,WA,WA,WA
Region,Canberra,Blue Mountains,Capital Country,Central Coast,Central NSW,Hunter,New England North West,North Coast NSW,Outback NSW,Riverina,...,Phillip Island,Spa Country,Upper Yarra,Western Grampians,Wimmera,Australia's Coral Coast,Australia's Golden Outback,Australia's North West,Australia's South West,Experience Perth
Quarter,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2012Q1,446.9046,184.052,246.4256,359.6829,423.284,703.6991,337.4748,1140.2143,38.996,183.6027,...,315.5053,78.7224,60.24,55.7749,19.5331,168.1854,137.1244,81.1448,601.0634,735.495
2012Q2,456.802,259.4219,294.3512,248.5392,526.6296,601.2917,329.1314,923.79,80.2753,217.9396,...,147.4012,79.3044,68.1378,59.7606,34.6162,187.2203,172.0034,190.0116,413.7076,656.6739
2012Q3,521.8367,190.1276,216.0869,309.3167,394.8585,656.9761,340.8776,971.2369,71.9047,176.5125,...,126.0946,57.1883,48.5398,60.6183,32.752,165.5378,188.7788,184.0548,320.77,651.6007
2012Q4,511.7661,131.0548,287.3246,340.7241,505.6835,795.4073,350.3062,1199.5305,62.814,191.4725,...,169.6084,55.4732,72.0509,89.813,43.1522,152.108,184.3617,137.4486,547.8138,778.8894
2013Q1,524.5536,193.2221,304.095,377.2229,363.1995,711.2954,314.9118,1234.2089,45.9733,182.3122,...,246.1275,39.6718,71.5825,59.2859,24.5524,171.1355,156.8752,85.5274,583.7245,816.759
2013Q2,475.5325,181.7916,205.4075,346.6879,560.3985,680.0313,268.2175,936.4144,104.124,271.8371,...,188.9554,68.3296,48.1736,74.256,36.4962,168.1334,193.9896,161.4666,538.9682,836.0121
2013Q3,506.5121,197.8199,248.7887,258.911,404.9852,697.9204,299.0346,1011.6567,106.6543,205.5485,...,117.3503,66.1651,22.0,58.3112,15.166,180.1881,166.2001,227.231,419.9423,750.4847
2013Q4,529.5845,161.3945,301.8307,319.046,441.1772,659.9071,288.0075,1114.8221,72.2101,195.8645,...,185.3366,72.4729,31.6343,101.5272,48.0526,132.3641,131.349,117.5055,437.58,764.8393
2014Q1,540.6075,179.7695,267.6407,414.3682,339.3733,801.2537,344.7888,1301.4334,47.1369,217.1528,...,326.7374,108.1304,71.0182,50.4544,21.1087,259.2209,257.6857,250.4867,945.588,1077.166
2014Q2,586.8275,264.5403,242.883,296.1988,472.07,692.4411,291.2233,1174.7376,99.9929,217.3002,...,164.107,71.9461,45.4003,88.9758,30.0972,245.3316,213.512,278.0264,761.1792,1085.9378


State,ACT,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,...,VIC,VIC,VIC,VIC,VIC,WA,WA,WA,WA,WA
Region,Canberra,Blue Mountains,Capital Country,Central Coast,Central NSW,Hunter,New England North West,North Coast NSW,Outback NSW,Riverina,...,Phillip Island,Spa Country,Upper Yarra,Western Grampians,Wimmera,Australia's Coral Coast,Australia's Golden Outback,Australia's North West,Australia's South West,Experience Perth
Quarter,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2016Q1,625.1416,227.4607,306.4441,398.0859,508.685,842.8019,321.2152,1560.82,65.6794,238.9226,...,327.287,91.9398,38.0968,95.9714,42.6252,284.5467,278.3108,209.7269,982.0733,1116.5143
2016Q2,592.6085,303.5584,322.5047,250.956,461.0432,829.9771,392.0172,1153.6977,134.8313,279.9532,...,168.653,82.1689,63.6975,49.5275,24.4022,279.9112,288.9768,289.8143,650.1197,960.5722
2016Q3,572.4371,211.0181,262.9282,265.4521,505.2368,801.0572,350.3716,1038.7536,141.2427,254.5432,...,145.2257,77.7021,52.0486,70.2469,24.4363,254.0475,271.1387,424.9869,531.2379,835.6335
2016Q4,667.2141,220.1716,277.333,379.4839,525.2546,843.7873,346.6872,1407.1824,105.9467,230.6681,...,189.9613,114.6199,59.6499,102.3744,25.7099,256.8122,269.9291,316.2068,755.0863,1058.2963
2017Q1,634.3687,190.4739,300.0212,455.0586,529.9431,757.0282,358.0866,1409.7407,91.137,182.6422,...,339.8957,87.9224,114.1359,92.9616,62.9338,241.9544,225.9662,226.5482,919.8692,956.5737
2017Q2,748.2904,315.7889,305.7111,278.4784,651.3597,897.9597,405.9132,1164.0613,107.5434,293.565,...,196.4758,56.1299,93.9915,71.5968,41.8446,223.8558,256.354,285.7901,764.1528,908.3352
2017Q3,631.7599,314.6307,287.2512,272.4384,628.5964,914.8204,398.9737,1145.6544,162.5924,213.9023,...,156.0623,100.1157,39.8043,56.8456,62.5967,282.3629,306.1539,352.1124,547.0572,1006.2687
2017Q4,720.3294,203.4606,356.3841,427.7658,628.2281,1004.3339,404.1934,1329.1106,106.3099,218.1398,...,238.1329,95.3488,75.0986,119.9106,45.15,274.7947,223.7437,288.6869,745.9721,1102.5569


### Q1.

Given that:

1. Our goal is to create an 8-step forecasting model.

2. We have allocated 16 observations for the validation set. 

How many train-val splits will we end up with under TSCV?

In [5]:
a = np.zeros((9, 16))
for i, arr in enumerate(a):
    arr[0:i+8] = 1
    arr[:i] = -1
df_0 = pd.DataFrame(a.astype(int))
df_0.columns = range(-16, 0)
df_0.index = range(1, 10)
df_0.style.background_gradient(cmap='coolwarm', axis=None)

Unnamed: 0,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1
1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0
2,-1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0
3,-1,-1,1,1,1,1,1,1,1,1,0,0,0,0,0,0
4,-1,-1,-1,1,1,1,1,1,1,1,1,0,0,0,0,0
5,-1,-1,-1,-1,1,1,1,1,1,1,1,1,0,0,0,0
6,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,0,0,0
7,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,0,0
8,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,0
9,-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1


### Answer.

`9 train-val splits`

### Q2.

Extract the `Canberra` time series.

Read the `statsmodels` API entries on [`ETS`](https://www.statsmodels.org/stable/generated/statsmodels.tsa.exponential_smoothing.ets.ETSModel.html) and [`ARIMA`](https://www.statsmodels.org/stable/generated/statsmodels.tsa.arima.model.ARIMA.html) to determine the different hyperparameters/configurations that you can select. 

Recall that `ARIMA` models have certain [assumptions](https://otexts.com/fpp3/stationarity.html).

Using TSCV on the validation set, find the best performing `ETS` and `ARIMA` model based on the sum of their average and stdev. `RMSE` values.

Compile the `RMSE` statistics of both models in a dataframe and print it (i.e. avg, stdev, avg + stdev).

In [6]:
# Your code here
res = bestModel(df_design[('ACT', 'Canberra')])

In [8]:
(res
 .groupby('Model', as_index=False)
 .apply(lambda x: x.iloc[x.Sum.argmin()])
)

Unnamed: 0,Model,Config,Average RMSE,Stdev RMSE,Sum
0,ARIMA,"{'order': (1, 0, 0), 'trend': 't'}",44.0876,12.9451,57.0327
1,ETSModel,"{'error': 'add', 'trend': 'mul', 'damped_trend': False, 'seasonal': None, 'seasonal_periods': 4, 'initialization_method': 'heuristic'}",55.0465,12.3809,67.4274


### Q3.

Choose the best model from the two above. Evaluate it on the `Canberra` test set.

Compile the `MAE`, `RMSE`, `MASE`, and `RMSSE` metrics in a dataframe and print it.

In [9]:
regions = [{
    'Region': ('ACT', 'Canberra') ,
    **res.loc[res['Sum'].idxmin()].to_dict()}]
df_forecast = forecastUsingConfig(regions, df_design, df_test)

In [10]:
rateMyForecast(
    df_design[[('ACT', 'Canberra')]],
    df_test[[('ACT', 'Canberra')]],
    df_forecast[[('ACT', 'Canberra')]]);

Unnamed: 0_level_0,RMSE,MAE,MASE,RMSSE
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(ACT, Canberra)",51.3659,39.9172,0.6221,0.8006


### Q4.

Repeat the process done in Q2 and Q3 <b><u>for all regions</b></u>.

Only print out the final dataframe. Sample shown below.

In [12]:
regions = []
for label, content in tqdm.tqdm(list(df_design.items())):
    regions.append({
        'Region': label,
        **bestModel(content).nsmallest(1, 'Sum').iloc[0].to_dict()
    })

100%|██████████| 76/76 [2:51:39<00:00, 135.53s/it]  


In [13]:
pd.DataFrame(regions)

Unnamed: 0,Region,Model,Config,Average RMSE,Stdev RMSE,Sum
0,"(ACT, Canberra)",ARIMA,"{'order': (1, 0, 0), 'trend': 't'}",44.0876,12.9451,57.0327
1,"(NSW, Blue Mountains)",ETSModel,"{'error': 'mul', 'trend': 'add', 'damped_trend': False, 'seasonal': 'add', 'seasonal_periods': 4, 'initialization_method': 'heuristic'}",34.5277,7.9519,42.4797
2,"(NSW, Capital Country)",ARIMA,"{'order': (1, 0, 2), 'trend': 't'}",33.1587,4.6608,37.8195
3,"(NSW, Central Coast)",ARIMA,"{'order': (2, 0, 2), 'trend': 'c'}",33.8040,3.6350,37.4390
4,"(NSW, Central NSW)",ETSModel,"{'error': 'mul', 'trend': None, 'damped_trend': False, 'seasonal': 'add', 'seasonal_periods': 4, 'initialization_method': 'estimated'}",56.8041,5.5173,62.3214
...,...,...,...,...,...,...
71,"(WA, Australia's Coral Coast)",ARIMA,"{'order': (0, 0, 2), 'trend': 't'}",64.7244,15.7177,80.4421
72,"(WA, Australia's Golden Outback)",ARIMA,"{'order': (0, 0, 2), 'trend': 't'}",64.0100,9.6084,73.6184
73,"(WA, Australia's North West)",ETSModel,"{'error': 'mul', 'trend': 'mul', 'damped_trend': False, 'seasonal': 'add', 'seasonal_periods': 4, 'initialization_method': 'heuristic'}",55.2003,18.4021,73.6024
74,"(WA, Australia's South West)",ARIMA,"{'order': (4, 0, 2), 'trend': 't'}",166.2667,55.0986,221.3654


In [19]:
df_forecast = forecastUsingConfig(regions, df_design, df_test)

In [54]:
rms_region = rateMyForecast(
    df_design,
    df_test,
    df_forecast)
clear_output()
rms_region['Best Model'] = [d['Model'] + str(d['Config']) for d in regions]
rms_region.sort_index(axis=1)

Unnamed: 0_level_0,Best Model,MAE,MASE,RMSE,RMSSE
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(ACT, Canberra)","ARIMA{'order': (1, 0, 0), 'trend': 't'}",39.9172,0.6221,51.3659,0.8006
"(NSW, Blue Mountains)","ETSModel{'error': 'mul', 'trend': 'add', 'damped_trend': False, 'seasonal': 'add', 'seasonal_periods': 4, 'initialization_method': 'heuristic'}",30.8866,0.8423,35.6142,0.9712
"(NSW, Capital Country)","ARIMA{'order': (1, 0, 2), 'trend': 't'}",34.3025,0.7922,42.7229,0.9867
"(NSW, Central Coast)","ARIMA{'order': (2, 0, 2), 'trend': 'c'}",35.4584,0.4389,43.4205,0.5375
"(NSW, Central NSW)","ETSModel{'error': 'mul', 'trend': None, 'damped_trend': False, 'seasonal': 'add', 'seasonal_periods': 4, 'initialization_method': 'estimated'}",83.4478,1.3060,96.6310,1.5123
...,...,...,...,...,...
"(WA, Australia's Coral Coast)","ARIMA{'order': (0, 0, 2), 'trend': 't'}",37.0062,1.1588,41.6100,1.3030
"(WA, Australia's Golden Outback)","ARIMA{'order': (0, 0, 2), 'trend': 't'}",32.6179,1.1410,40.5184,1.4173
"(WA, Australia's North West)","ETSModel{'error': 'mul', 'trend': 'mul', 'damped_trend': False, 'seasonal': 'add', 'seasonal_periods': 4, 'initialization_method': 'heuristic'}",216.3964,4.1747,235.4822,4.5429
"(WA, Australia's South West)","ARIMA{'order': (4, 0, 2), 'trend': 't'}",86.1880,0.8297,105.4631,1.0152


### Q5.

Produce aggregate forecasts for the `State` and `Total` levels using the regional forecasts (i.e. bottom-up).

Calculate the average `MAE`, `RMSE`, `MASE`, and `RMSSE` across all series (i.e. `Region`, `State`, `Total`).

In [63]:
display(df_forecast.sum(level=0, axis=1))
rms_state = rateMyForecast(
    df_train.sum(level=0, axis=1),
    df_test.sum(level=0, axis=1),
    df_forecast.sum(level=0, axis=1))

Unnamed: 0_level_0,ACT,NSW,NT,QLD,SA,TAS,VIC,WA
Quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016Q1,604.2125,7826.41,224.1544,5004.8305,1685.8357,934.0161,5971.5984,2783.8184
2016Q2,611.2139,7327.2933,442.4822,5491.373,1536.7123,678.324,5190.6624,2897.3751
2016Q3,618.2485,7069.0324,601.6263,5834.4816,1431.5213,491.8589,4750.7931,2724.1374
2016Q4,625.3151,7600.4262,344.264,5426.4269,1598.054,693.7848,5351.9794,2919.2845
2017Q1,632.4125,7973.6466,227.3203,5167.6455,1720.8535,921.6617,5996.457,3110.639
2017Q2,639.5397,7447.2461,447.1472,5672.701,1564.964,672.0829,5243.7651,3092.1543
2017Q3,646.6956,7191.4208,608.0696,6021.3368,1458.6066,492.5985,4791.8063,3062.793
2017Q4,653.8791,7728.9161,348.4789,5600.0549,1625.4481,686.3289,5383.2495,3149.8696


Unnamed: 0_level_0,RMSE,MAE,MASE,RMSSE
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ACT,51.3659,39.9172,0.6211,0.7992
NSW,637.9279,569.0431,1.2632,1.4161
NT,90.272,66.0411,0.4155,0.5679
QLD,329.2314,238.9838,0.6201,0.8542
SA,133.3043,117.341,0.7238,0.8223
TAS,126.4874,114.8961,0.5063,0.5574
VIC,902.7367,805.475,1.2387,1.3883
WA,465.315,432.716,4.708,5.0626


In [61]:
df_forecast_total = df_forecast.sum(axis=1).rename('Total').to_frame()
display(df_forecast_total)
rms_total = rateMyForecast(
    df_train.sum(axis=1).rename('Total').to_frame(),
    df_test.sum(axis=1).rename('Total').to_frame(),
    df_forecast_total)

Unnamed: 0_level_0,Total
Quarter,Unnamed: 1_level_1
2016Q1,25034.8761
2016Q2,24175.4361
2016Q3,23521.6996
2016Q4,24559.5347
2017Q1,25750.636
2017Q2,24779.6002
2017Q3,24273.3272
2017Q4,25176.225


Unnamed: 0_level_0,RMSE,MAE,MASE,RMSSE
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Total,1656.7145,1490.3895,1.3834,1.5378


<div class="alert alert-block alert-info">
Calculating the average for all 85 (1 Total + 8 States + 76 Regions) time series forecasts as done in the book. (https://otexts.com/fpp3/tourism.html)
</div>


In [76]:
(pd
 .concat([rms_region, rms_state, rms_total])
 .mean()
 .rename('All Series')
 .to_frame()
 .T)

Unnamed: 0,RMSE,MAE,MASE,RMSSE
All Series,97.2011,83.5789,0.8997,1.0778
