# Time Series Analysis: Midterm Exam

`MENE, MENE, TEKEL, UPHARSIN.
-The Book of Daniel, Chapter 5: Belshazzar's Feast`

---

## Libraries

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import lightgbm as lgb
from sklearn.multioutput import MultiOutputRegressor
from pandas.plotting import register_matplotlib_converters
from IPython.display import display
from tsa_functions import *

register_matplotlib_converters()
sns.set_style('darkgrid')

np.set_printoptions(precision=2)
pd.set_option('precision', 2)

In [3]:
import json
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
import tqdm
from IPython.display import clear_output
from itertools import product
import warnings


warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [62]:
def mae(y_true, y_pred):
    score = np.mean(np.abs(y_true - y_pred))
    return score


def rmse(y_true, y_pred):
    if len(y_true) != len(y_pred):
        raise ValueError('Lengths Mismatch')
    score = np.sqrt(np.mean((y_true - y_pred)**2))
    return score


def mase(y_true, y_pred, s_ts):
    if len(y_true) != len(y_pred):
        raise ValueError('Lengths Mismatch')
    ts = s_ts.to_numpy()
    score = np.mean(
        np.abs((y_true - y_pred)/np.mean(np.abs(ts[1:] - ts[:-1]))))
    return score


def rmsse(y_true, y_pred, ts):
    if len(y_true) != len(y_pred):
        raise ValueError('Lengths Mismatch')
    ts = ts.to_numpy()
    score = np.sqrt(
        np.mean(((y_true - y_pred)/np.mean(np.abs(ts[1:] - ts[:-1])))**2))
    return score


def rateMyForecast(train, test, forecast):
    """
    Evalute the forcast per group, given train, test, and forecast tables.

    The function evaluates the metrics per column of the provided table.

    Parameters
    ----------
    train : DataFrame
        DataFrame contaning the train set.
    test : DataFrame
        DataFrame contaning the train set.
    forecast : DataFrame
        DataFrame contaning the train set.

    Returns
    -------
    DataFrame
        DataFrame contaning the metrics as columns, groups as rows,
        and scores as values.

    """
    res = pd.DataFrame([
        {'Group': col,
         'MAE': mae(test[col], forecast[col]),
         'RMSE': rmse(test[col], forecast[col])}
        for col in test])
    display(res.set_index('Group'))
    return res.set_index('Group')


def TimeseriesGenerator(X, y, w, h):
    X_train = np.array(*timeseries_dataset_from_array(
        X, targets=None, sequence_length=w, end_index=len(X)-h))
    y_train = np.array(*timeseries_dataset_from_array(
        X, targets=None, sequence_length=h, start_index=w))
    X_test = X[None, -w:]
    y_test = y[None, :]
    return X_train, X_test, y_train, y_test


def cross_val_score(X, est, config, scoring, cv):
    param = config.copy()
    h = param.pop('h')
    w = param.pop('w')
    folds = cv.split(X, h)
    scores = {metric: [] for metric in scoring}
    for train, val in folds:
        X_train, X_test, y_train, y_test = TimeseriesGenerator(
            train, val, w, h)
        est.set_params(**param)
        est.fit(X_train, y_train)
        y_hat = est.predict(X_test)
        for metric in scores:
            scores[metric].append(scoring[metric](y_test, y_hat))
    return scores


def cross_val_predict(X, est, config, cv):
    param = config.copy()
    h = param.pop('h')
    w = param.pop('w', None)
    folds = cv.split(X, h)
    fit_params = {}
    res = {}
    for k, (train, val) in enumerate(folds):
        if w:
            X_train, X_test, y_train, y_test = TimeseriesGenerator(
                train, val, w, h)
            est.set_params(**param)
            est.fit(X_train, y_train)
            y_hat = est.predict(X_test)[0]
        else:
            try:
                model = est(X, **param)
                fit = model.fit(**fit_params)
                y_hat = fit.forecast(h)
            except:
                y_hat = np.full(len(val), np.nan)
        res[k] = y_hat
    return res

class TimeSeriesSplit:
    def __init__(self, val_size):
        self.val_size = val_size

    def split(self, design_set, h):
        val_end = len(design_set)
        divider = val_end - h
        dataset = []
        while len(design_set) - divider <= self.val_size:
            dataset.append(
                (design_set[np.arange(0, divider)],
                 design_set[np.arange(divider, val_end)]))
            val_end -= 1
            divider -= 1
        return dataset[::-1]


class GridSearchCV:
    def __init__(self, estimator, param_grid, cv, scoring):
        self.est = estimator
        self.param_grid = param_grid
        self.param_list = [
            dict(zip(param_grid.keys(), params))
            for params in product(*param_grid.values())]
        self.cv = cv
        self.scoring = scoring

    def fit(self, X, scores=False):
        self.cv_results_ = []
        self.df_records_ = []
        for param in tqdm.tqdm(self.param_list):
            if scores:
                res = {
                    'params': param.copy(),
                    **cross_val_score(
                        X, self.est, param, self.scoring, self.cv)}
                rec = {
                    'Lookback': res['params']['w'],
                    'Horizon': res['params']['h'],
                    'Average RMSE': np.mean(res['rmse']),
                    'Stdev RMSE': np.std(res['rmse'])}
                rec['Sum'] = (rec['Average RMSE'] + rec['Stdev RMSE'])
#                 self.best_params = (
#                     self.df.nsmallest(1, 'Sum').iloc[0].to_dict())
            else:
                res = {
                    'params': param.copy(),
                    **cross_val_predict(
                        X, self.est, param, self.cv)}
                rec = res
            self.cv_results_.append(res)
            self.df_records_.append(rec)
        self.df = pd.DataFrame(self.df_records_)
                

def forecastUsingConfig(est, regions, design_set, test_set):
    forecast = {}
    for region in regions:
        train = design_set[region['Region']]
        test = test_set[region['Region']]
        w = int(region['Lookback'])
        h = int(region['Horizon'])
        X_train, X_test, y_train, y_test = TimeseriesGenerator(
            train, test, w, h)
#         est.set_params(**param)
        fit = est.fit(X_train, y_train)
        forecast[region['Region']] = fit.predict(X_test)[0]
    forecast_set = pd.DataFrame(forecast)
    forecast_set.index = test_set.index
    return forecast_set

---

## Forecasting Australian Domestic Tourism using Machine Learning 

Suppose that you've been hired by the Australian government to forecast the quarterly domestic tourism demand for the entire country.

Your goal is to generate forecasts using `LightGBM` models.

---

Load the Australian domestic tourism dataset and perform the following:

- Recode the `State` variable, similar to what we did in the discussion notebook.

- Create a hierarchical time series with overnight trips in regions at the bottom level of the hierarchy (which can then be aggregated to states, then aggregated to the national `Total`), similar to what we did in the previous notebooks.

- Re-index the series using `pd.period_range` instead of the strings in `Quarter`.

- Withhold the last two years (8 quarters) as a test set. 

- Withhold the last four years (16 quarters) in the training set as a validation set.

In [14]:
state_dict = {
    'New South Wales': 'NSW',
    'Northern Territory': 'NT',
    'Queensland': 'QLD',
    'South Australia': 'SA',
    'Tasmania': 'TAS',
    'Victoria': 'VIC',
    'Western Australia': 'WA'}
df = pd.read_csv('special_datasets/tourism.csv')
df.State.replace(state_dict, inplace=True)

df.index = pd.DatetimeIndex(df.Quarter).to_period('Q')
# df['Year'] = pd.to_datetime(df.Quarter).dt.year
# df['Quarter'] = pd.to_datetime(df.Quarter).dt.quarter

df_pivot = df.pivot_table(
    values='Trips',
    index=df.index,
    columns=['State', 'Region'],
    aggfunc=np.sum)

df_design = df_pivot.loc[: '2015Q4']
df_train = df_pivot.loc[: '2013Q4']
df_val = df_pivot.loc['2012Q1': '2015Q4']
df_test = df_pivot.loc['2016Q1':]
display(df_train, df_val, df_test)

State,ACT,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NT,NT,NT,NT,NT,NT,NT,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,TAS,TAS,TAS,TAS,TAS,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,WA,WA,WA,WA,WA
Region,Canberra,Blue Mountains,Capital Country,Central Coast,Central NSW,Hunter,New England North West,North Coast NSW,Outback NSW,Riverina,Snowy Mountains,South Coast,Sydney,The Murray,Alice Springs,Barkly,Darwin,Kakadu Arnhem,Katherine Daly,Lasseter,MacDonnell,Brisbane,Bundaberg,Central Queensland,Darling Downs,Fraser Coast,Gold Coast,Mackay,Northern,Outback,Sunshine Coast,Tropical North Queensland,Whitsundays,Adelaide,Adelaide Hills,Barossa,Clare Valley,Eyre Peninsula,Fleurieu Peninsula,Flinders Ranges and Outback,Kangaroo Island,Limestone Coast,Murraylands,Riverland,Yorke Peninsula,East Coast,Hobart and the South,"Launceston, Tamar and the North",North West,Wilderness West,Ballarat,Bendigo Loddon,Central Highlands,Central Murray,Geelong and the Bellarine,Gippsland,Goulburn,Great Ocean Road,High Country,Lakes,Macedon,Mallee,Melbourne,Melbourne East,Murray East,Peninsula,Phillip Island,Spa Country,Upper Yarra,Western Grampians,Wimmera,Australia's Coral Coast,Australia's Golden Outback,Australia's North West,Australia's South West,Experience Perth
Quarter,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2
1998Q1,551.0,195.54,261.81,455.17,424.4,674.98,329.84,1342.3,96.6,215.27,140.99,1257.45,2288.96,356.5,20.21,18.47,87.61,18.36,16.77,10.09,9.95,1052.19,122.91,191.59,218.61,173.96,827.05,128.94,226.11,76.27,742.6,220.92,60.23,658.55,9.8,46.8,34.75,92.2,212.69,112.1,34.68,193.6,90.08,89.51,160.68,148.86,380.6,247.6,141.24,63.34,182.24,231.69,88.91,196.3,255.44,382.61,153.4,739.35,320.5,211.84,24.79,182.08,1578.55,86.85,22.32,694.72,364.04,86.2,102.79,87.0,18.8,132.52,161.73,120.78,474.86,751.21
1998Q2,416.03,200.41,238.6,343.48,557.01,791.89,328.37,1161.11,120.65,336.09,166.59,795.07,1814.46,312.29,56.36,7.51,116.86,52.85,40.81,34.85,4.71,950.41,110.06,288.45,352.63,129.74,680.77,131.95,159.48,195.24,609.88,253.1,106.19,449.85,26.07,49.43,55.58,102.16,185.66,122.2,9.06,134.17,57.38,98.75,104.32,106.22,226.91,172.85,144.7,42.61,137.57,221.58,105.16,198.16,191.67,267.98,177.73,357.59,355.47,245.95,54.57,186.33,1506.07,58.93,31.0,246.63,166.24,74.36,74.86,84.94,52.48,172.62,164.97,158.4,411.62,668.71
1998Q3,436.03,253.36,184.42,303.67,555.53,566.04,324.86,839.37,110.07,228.07,372.82,643.28,1989.73,376.72,110.92,43.57,179.9,49.46,81.97,42.98,19.64,1002.77,129.55,288.23,384.86,182.5,839.02,154.91,228.86,262.62,615.31,423.51,81.77,592.9,26.49,29.74,30.07,60.35,85.62,103.43,10.46,81.74,59.49,64.03,69.0,40.46,176.9,90.6,75.05,18.85,117.64,128.05,98.76,112.97,217.11,257.24,157.5,342.32,356.66,198.0,21.61,187.35,1522.21,66.82,30.27,183.43,83.03,60.78,59.47,79.97,35.66,173.9,206.88,184.62,360.04,662.85
1998Q4,449.8,245.57,229.09,331.82,590.16,591.57,279.8,1183.86,185.47,201.12,126.43,829.91,2150.91,336.37,40.87,29.36,88.44,26.96,15.35,32.72,14.0,992.12,116.56,268.49,350.16,135.9,819.86,163.5,148.32,134.19,684.43,283.69,105.6,524.24,27.26,78.19,41.58,88.15,138.78,132.84,8.62,107.71,105.04,96.82,103.34,98.65,227.71,186.68,117.11,50.45,136.07,186.65,144.41,220.75,198.47,227.19,148.59,441.39,233.87,142.12,47.61,204.83,1511.18,59.96,32.34,299.02,215.68,46.01,35.24,116.24,27.2,207.0,198.51,138.88,462.62,832.71
1999Q1,378.57,290.48,252.28,466.82,480.13,785.1,327.49,1306.88,78.36,200.82,182.66,1111.04,1779.29,323.42,48.37,6.34,68.61,13.63,25.31,14.82,7.81,1049.75,119.29,219.53,300.72,160.64,986.89,109.72,157.25,80.52,842.17,194.51,111.5,548.39,13.77,35.28,29.64,104.95,222.12,99.71,12.56,163.61,69.94,94.54,146.66,128.81,355.15,215.85,165.71,59.89,156.46,177.44,102.31,210.96,349.27,322.14,99.59,604.52,286.86,152.45,52.13,153.67,1449.12,44.58,29.15,564.87,263.71,65.3,67.82,101.77,50.22,198.86,140.21,103.34,562.97,830.31
1999Q2,558.18,294.8,212.37,312.84,459.64,682.25,313.85,928.05,147.01,246.82,163.29,841.07,2197.58,254.48,68.02,23.53,106.13,38.32,63.16,45.54,21.39,1276.35,163.41,327.77,423.14,119.32,751.04,215.25,269.36,184.44,624.43,367.04,102.95,568.7,27.02,51.43,50.4,99.76,206.29,128.54,22.31,147.85,99.11,99.61,135.1,90.06,251.55,147.15,94.3,37.72,150.65,162.02,107.34,145.0,204.21,246.4,111.63,407.97,358.12,134.76,40.34,200.53,1441.97,96.4,25.87,247.37,189.26,81.88,104.59,82.15,23.25,252.75,138.18,223.92,467.96,754.15
1999Q3,448.9,247.57,229.11,279.45,469.37,655.29,388.82,1028.05,183.56,204.44,278.78,585.4,1968.95,204.89,76.69,20.28,188.74,71.9,79.39,45.83,18.61,1162.62,135.78,310.48,512.13,169.92,822.18,216.6,332.9,185.41,641.56,388.55,139.9,538.05,12.06,30.45,45.74,86.7,153.72,133.52,5.17,66.69,53.9,84.2,72.75,41.11,208.7,109.72,59.89,10.8,115.6,168.09,104.57,204.13,134.19,183.18,114.75,316.42,255.26,98.11,74.74,187.07,1267.43,67.58,16.47,144.74,119.64,30.12,77.46,66.95,37.1,201.03,196.49,245.22,392.78,689.85
1999Q4,594.83,241.55,254.27,363.1,564.14,630.84,301.48,1066.86,121.86,256.41,122.63,791.74,2101.37,319.53,65.81,10.04,78.05,19.04,35.26,30.49,9.74,929.77,129.41,242.0,480.44,96.64,913.54,121.4,202.79,128.85,639.12,338.66,127.21,562.43,15.66,50.62,32.57,94.71,173.39,108.79,12.35,90.65,83.36,73.54,88.86,71.7,219.04,160.42,96.18,44.41,131.43,195.44,129.34,174.73,158.08,209.79,97.89,358.82,286.06,160.13,40.2,171.67,1375.86,74.18,35.94,247.71,131.48,45.69,33.18,102.48,41.33,178.71,178.64,75.49,478.52,607.58
2000Q1,599.67,196.97,225.96,371.46,413.6,727.26,337.36,1277.58,86.78,203.01,160.97,1128.27,1893.48,273.29,32.38,11.59,68.07,25.08,37.02,16.71,15.2,1147.9,88.05,225.46,312.72,131.03,871.34,126.12,192.15,147.16,809.72,290.25,71.32,646.36,17.2,56.76,29.71,100.79,194.6,143.37,39.39,191.07,131.28,105.57,176.77,123.83,296.27,186.64,128.19,54.2,141.03,344.37,85.78,265.16,292.95,283.01,127.27,684.42,232.11,237.47,42.03,194.01,1612.51,68.27,28.13,424.6,246.43,80.69,81.05,67.13,28.41,177.0,159.63,60.57,482.17,756.14
2000Q2,557.14,233.54,321.48,335.44,467.09,627.62,309.31,854.15,97.62,188.83,171.08,744.46,1857.92,236.69,39.77,32.64,141.57,44.36,64.78,26.87,9.78,1033.56,142.96,347.74,371.13,101.57,780.26,89.0,189.49,193.42,682.08,321.91,91.04,562.76,18.37,40.72,36.26,121.06,118.63,95.58,19.13,152.4,63.82,79.25,107.51,66.68,200.11,130.88,72.31,39.09,169.71,242.59,122.62,144.73,217.19,210.78,70.77,340.95,370.72,161.14,21.98,170.47,1502.0,70.69,18.99,269.74,163.15,79.72,60.28,66.36,27.34,161.52,175.43,99.85,553.6,817.93


State,ACT,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NT,NT,NT,NT,NT,NT,NT,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,TAS,TAS,TAS,TAS,TAS,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,WA,WA,WA,WA,WA
Region,Canberra,Blue Mountains,Capital Country,Central Coast,Central NSW,Hunter,New England North West,North Coast NSW,Outback NSW,Riverina,Snowy Mountains,South Coast,Sydney,The Murray,Alice Springs,Barkly,Darwin,Kakadu Arnhem,Katherine Daly,Lasseter,MacDonnell,Brisbane,Bundaberg,Central Queensland,Darling Downs,Fraser Coast,Gold Coast,Mackay,Northern,Outback,Sunshine Coast,Tropical North Queensland,Whitsundays,Adelaide,Adelaide Hills,Barossa,Clare Valley,Eyre Peninsula,Fleurieu Peninsula,Flinders Ranges and Outback,Kangaroo Island,Limestone Coast,Murraylands,Riverland,Yorke Peninsula,East Coast,Hobart and the South,"Launceston, Tamar and the North",North West,Wilderness West,Ballarat,Bendigo Loddon,Central Highlands,Central Murray,Geelong and the Bellarine,Gippsland,Goulburn,Great Ocean Road,High Country,Lakes,Macedon,Mallee,Melbourne,Melbourne East,Murray East,Peninsula,Phillip Island,Spa Country,Upper Yarra,Western Grampians,Wimmera,Australia's Coral Coast,Australia's Golden Outback,Australia's North West,Australia's South West,Experience Perth
Quarter,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2
2012Q1,446.9,184.05,246.43,359.68,423.28,703.7,337.47,1140.21,39.0,183.6,134.11,1038.1,2050.56,188.97,38.7,8.31,82.9,20.89,31.02,7.18,11.07,1276.03,84.57,291.14,478.8,157.55,832.24,127.19,169.84,111.07,758.03,278.61,93.07,532.22,14.91,48.46,38.21,88.42,244.12,105.63,36.21,132.6,73.71,51.83,149.43,130.31,346.95,198.72,148.59,45.23,178.63,208.34,68.32,190.75,349.63,295.23,102.74,599.82,267.17,172.94,65.77,164.29,1685.91,77.29,46.29,477.95,315.51,78.72,60.24,55.77,19.53,168.19,137.12,81.14,601.06,735.5
2012Q2,456.8,259.42,294.35,248.54,526.63,601.29,329.13,923.79,80.28,217.94,126.22,755.47,1934.29,226.84,43.22,12.11,128.03,38.25,48.96,19.7,13.61,1150.03,159.92,387.81,400.88,151.54,816.95,211.86,242.75,149.63,648.88,342.86,119.67,533.66,41.64,51.64,48.86,72.91,146.35,117.58,24.74,110.85,51.66,63.74,96.62,54.28,223.66,132.81,99.8,25.94,149.23,265.68,88.32,160.22,182.77,204.34,142.11,356.08,286.33,129.82,57.03,132.89,1757.2,112.85,17.05,259.29,147.4,79.3,68.14,59.76,34.62,187.22,172.0,190.01,413.71,656.67
2012Q3,521.84,190.13,216.09,309.32,394.86,656.98,340.88,971.24,71.9,176.51,300.97,506.48,1923.7,202.57,65.68,17.31,185.47,80.32,58.36,40.96,14.72,1294.26,145.62,395.35,422.91,165.78,874.53,129.33,248.94,233.08,627.0,434.94,129.07,539.97,17.02,26.59,44.28,101.74,97.03,114.9,11.82,83.91,41.65,50.81,70.09,29.91,261.73,127.39,52.59,6.5,142.18,123.68,75.56,121.78,151.98,164.31,94.25,245.99,309.98,116.17,31.0,164.34,1762.61,104.27,23.32,153.72,126.09,57.19,48.54,60.62,32.75,165.54,188.78,184.05,320.77,651.6
2012Q4,511.77,131.05,287.32,340.72,505.68,795.41,350.31,1199.53,62.81,191.47,127.88,764.52,1919.27,227.13,25.67,11.93,102.56,23.95,53.22,15.67,15.86,1268.18,150.84,281.89,348.39,141.66,999.78,252.12,195.8,143.43,763.39,436.23,125.71,610.79,33.82,61.01,22.77,88.9,144.82,127.69,34.67,101.77,69.04,64.42,143.65,61.68,307.32,173.03,86.91,33.6,138.69,233.33,85.62,172.56,188.21,220.33,115.1,351.87,271.67,180.19,63.32,156.59,1848.41,97.0,26.88,240.04,169.61,55.47,72.05,89.81,43.15,152.11,184.36,137.45,547.81,778.89
2013Q1,524.55,193.22,304.09,377.22,363.2,711.3,314.91,1234.21,45.97,182.31,160.73,1057.57,1797.54,228.71,18.97,6.8,67.34,17.97,5.3,10.2,6.83,1400.08,103.87,294.51,391.88,97.32,968.31,153.66,191.81,80.62,692.1,324.43,80.54,503.19,44.28,34.61,41.59,49.38,272.45,67.38,10.94,106.18,111.65,74.21,154.37,125.92,384.28,254.92,144.78,53.13,128.04,176.27,86.56,173.27,311.39,325.46,124.25,592.4,253.45,243.83,31.72,159.88,1634.71,113.61,19.99,512.9,246.13,39.67,71.58,59.29,24.55,171.14,156.88,85.53,583.72,816.76
2013Q2,475.53,181.79,205.41,346.69,560.4,680.03,268.22,936.41,104.12,271.84,129.25,696.38,2105.89,267.39,40.75,12.95,126.61,31.77,42.87,50.33,19.01,1156.67,127.86,288.22,404.76,125.88,831.2,184.46,222.64,162.17,589.36,442.45,124.91,494.49,46.98,69.1,28.47,81.68,147.32,140.41,33.88,131.71,58.6,95.03,130.96,56.96,287.75,169.68,104.54,38.59,121.35,210.52,94.1,189.07,195.09,237.71,134.43,349.78,295.94,132.86,47.88,176.11,1816.21,106.9,9.47,209.41,188.96,68.33,48.17,74.26,36.5,168.13,193.99,161.47,538.97,836.01
2013Q3,506.51,197.82,248.79,258.91,404.99,697.92,299.03,1011.66,106.65,205.55,294.91,678.77,2181.61,175.48,61.05,19.73,176.33,52.67,47.81,38.28,24.81,1233.68,152.27,254.86,479.96,154.81,837.35,174.02,337.49,144.85,588.54,475.97,155.42,587.56,23.58,37.61,42.84,73.55,115.34,105.95,16.88,64.15,49.01,53.38,104.15,53.81,182.97,105.77,89.04,16.67,95.2,186.78,71.39,164.62,171.85,155.3,96.48,295.69,233.47,112.34,45.89,137.1,1839.93,117.78,16.15,233.28,117.35,66.17,22.0,58.31,15.17,180.19,166.2,227.23,419.94,750.48
2013Q4,529.58,161.39,301.83,319.05,441.18,659.91,288.01,1114.82,72.21,195.86,111.12,823.92,2262.1,286.37,36.06,8.86,81.4,36.83,37.31,17.38,14.43,1392.67,124.57,301.56,268.82,133.11,870.19,126.65,215.23,161.38,741.1,361.82,139.59,542.75,36.45,68.35,22.99,150.11,145.83,80.33,20.06,131.61,58.17,91.94,96.25,75.73,238.52,113.41,87.73,33.61,203.28,201.64,91.91,168.4,246.84,277.34,97.15,457.53,264.2,156.13,76.81,152.79,1790.87,118.08,41.01,341.56,185.34,72.47,31.63,101.53,48.05,132.36,131.35,117.51,437.58,764.84
2014Q1,540.61,179.77,267.64,414.37,339.37,801.25,344.79,1301.43,47.14,217.15,138.31,1027.84,2025.43,216.93,33.33,10.59,113.69,27.48,34.77,8.19,8.41,1321.64,93.42,271.6,351.57,150.36,996.04,148.3,192.39,102.32,830.38,381.48,71.44,628.46,30.17,44.06,28.67,66.48,239.4,107.35,31.84,145.12,95.97,68.62,144.64,119.18,364.93,217.46,131.82,63.67,131.33,195.17,88.45,195.84,353.92,350.02,143.29,622.0,324.91,216.4,56.24,134.85,1865.46,150.34,42.76,502.87,326.74,108.13,71.02,50.45,21.11,259.22,257.69,250.49,945.59,1077.17
2014Q2,586.83,264.54,242.88,296.2,472.07,692.44,291.22,1174.74,99.99,217.3,151.71,765.71,1976.86,240.09,83.35,5.13,196.14,82.81,105.73,83.8,14.04,1368.2,121.11,410.59,426.44,125.36,843.56,191.91,294.91,187.71,846.78,482.44,141.68,653.06,46.6,35.89,21.49,79.31,171.1,154.23,19.67,96.14,60.67,78.35,117.15,121.17,369.86,186.0,156.1,44.44,123.07,237.96,97.34,188.36,259.52,220.8,107.81,442.94,311.13,159.35,80.48,164.2,2060.16,153.38,47.46,255.98,164.11,71.95,45.4,88.98,30.1,245.33,213.51,278.03,761.18,1085.94


State,ACT,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NSW,NT,NT,NT,NT,NT,NT,NT,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,QLD,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,SA,TAS,TAS,TAS,TAS,TAS,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,VIC,WA,WA,WA,WA,WA
Region,Canberra,Blue Mountains,Capital Country,Central Coast,Central NSW,Hunter,New England North West,North Coast NSW,Outback NSW,Riverina,Snowy Mountains,South Coast,Sydney,The Murray,Alice Springs,Barkly,Darwin,Kakadu Arnhem,Katherine Daly,Lasseter,MacDonnell,Brisbane,Bundaberg,Central Queensland,Darling Downs,Fraser Coast,Gold Coast,Mackay,Northern,Outback,Sunshine Coast,Tropical North Queensland,Whitsundays,Adelaide,Adelaide Hills,Barossa,Clare Valley,Eyre Peninsula,Fleurieu Peninsula,Flinders Ranges and Outback,Kangaroo Island,Limestone Coast,Murraylands,Riverland,Yorke Peninsula,East Coast,Hobart and the South,"Launceston, Tamar and the North",North West,Wilderness West,Ballarat,Bendigo Loddon,Central Highlands,Central Murray,Geelong and the Bellarine,Gippsland,Goulburn,Great Ocean Road,High Country,Lakes,Macedon,Mallee,Melbourne,Melbourne East,Murray East,Peninsula,Phillip Island,Spa Country,Upper Yarra,Western Grampians,Wimmera,Australia's Coral Coast,Australia's Golden Outback,Australia's North West,Australia's South West,Experience Perth
Quarter,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2
2016Q1,625.14,227.46,306.44,398.09,508.69,842.8,321.22,1560.82,65.68,238.92,136.59,1210.63,2199.58,270.96,26.56,23.36,145.05,51.87,67.19,17.58,20.48,1327.24,91.84,282.91,361.78,143.96,933.09,168.01,274.76,126.64,840.3,403.73,87.9,695.25,70.57,49.95,39.58,112.57,223.19,116.22,30.62,160.43,111.2,90.17,171.73,172.56,406.4,199.37,174.14,58.58,119.6,235.28,131.52,200.69,424.55,358.45,144.64,691.59,422.87,253.19,57.83,145.89,2061.23,171.93,39.6,544.92,327.29,91.94,38.1,95.97,42.63,284.55,278.31,209.73,982.07,1116.51
2016Q2,592.61,303.56,322.5,250.96,461.04,829.98,392.02,1153.7,134.83,279.95,98.22,775.57,2389.05,269.21,76.55,14.11,168.07,72.38,68.62,61.4,8.0,1434.34,133.99,338.91,525.31,168.44,854.11,203.96,283.23,191.32,786.75,419.56,106.64,664.61,13.93,37.23,38.26,93.25,157.54,189.93,21.25,138.73,91.46,97.26,116.67,51.16,280.49,145.79,135.92,38.05,183.54,242.14,90.69,125.13,236.83,202.79,97.62,380.9,332.41,138.84,85.21,222.25,2125.7,147.91,33.23,301.58,168.65,82.17,63.7,49.53,24.4,279.91,288.98,289.81,650.12,960.57
2016Q3,572.44,211.02,262.93,265.45,505.24,801.06,350.37,1038.75,141.24,254.54,398.11,653.47,2217.42,226.7,120.97,36.34,232.36,113.06,92.34,89.19,29.57,1496.62,169.67,447.11,455.37,158.09,850.26,223.96,325.35,209.93,843.53,593.58,165.87,635.57,39.47,48.43,55.98,87.79,123.51,164.08,13.4,109.1,52.25,78.17,126.49,43.89,270.19,154.8,75.42,21.97,148.2,263.94,71.84,183.01,254.13,173.06,110.71,341.24,412.96,92.84,99.86,143.67,2205.61,110.27,36.87,204.02,145.23,77.7,52.05,70.25,24.44,254.05,271.14,424.99,531.24,835.63
2016Q4,667.21,220.17,277.33,379.48,525.25,843.79,346.69,1407.18,105.95,230.67,143.47,923.95,2369.82,221.75,40.29,16.43,139.79,67.05,50.46,21.36,5.3,1745.86,139.27,382.08,464.81,117.11,1065.63,254.39,268.28,175.44,926.13,479.35,88.04,634.37,52.39,55.64,37.89,95.54,229.6,164.23,45.93,106.12,58.46,71.38,83.93,106.14,330.99,216.4,148.59,30.87,157.29,279.35,96.42,223.31,411.78,271.84,142.2,497.8,295.05,180.17,70.31,174.84,2250.0,196.48,50.22,323.61,189.96,114.62,59.65,102.37,25.71,256.81,269.93,316.21,755.09,1058.3
2017Q1,634.37,190.47,300.02,455.06,529.94,757.03,358.09,1409.74,91.14,182.64,159.37,1189.25,2426.3,271.65,48.0,17.94,109.65,39.45,49.74,21.93,11.46,1550.14,112.6,231.0,380.59,148.92,1139.68,179.84,215.29,183.94,891.2,305.32,113.46,708.33,36.97,41.42,33.64,101.49,242.64,128.09,24.19,193.16,73.92,81.28,150.26,170.16,480.55,260.34,164.81,59.45,199.28,247.28,139.8,191.46,436.91,494.21,200.18,746.11,436.2,247.79,74.59,180.9,2161.49,187.25,66.46,561.77,339.9,87.92,114.14,92.96,62.93,241.95,225.97,226.55,919.87,956.57
2017Q2,748.29,315.79,305.71,278.48,651.36,897.96,405.91,1164.06,107.54,293.57,162.18,938.97,2446.16,317.35,85.13,44.52,223.19,65.85,104.1,65.68,32.98,1601.92,115.92,354.36,517.56,144.29,903.63,215.53,268.57,188.04,764.43,415.73,148.48,676.45,55.76,73.85,43.3,102.92,124.85,146.64,15.86,131.6,49.53,113.46,125.93,74.97,386.81,191.33,138.45,28.81,209.91,239.04,123.6,165.51,298.92,297.93,166.95,469.99,370.53,195.4,79.16,192.05,2100.86,175.59,31.12,324.78,196.48,56.13,93.99,71.6,41.84,223.86,256.35,285.79,764.15,908.34
2017Q3,631.76,314.63,287.25,272.44,628.6,914.82,398.97,1145.65,162.59,213.9,448.99,735.3,2515.94,259.18,80.81,21.26,226.11,81.87,88.53,62.57,36.51,1786.03,189.75,454.74,573.97,172.35,1052.64,247.06,269.67,265.01,834.9,570.96,116.75,674.14,37.12,39.65,30.68,62.21,98.14,208.61,16.68,99.18,66.3,81.25,100.82,56.08,214.08,167.77,151.64,28.52,116.66,209.23,102.93,120.16,295.32,300.69,150.43,456.17,494.43,118.22,50.67,203.49,2319.36,139.01,46.97,278.82,156.06,100.12,39.8,56.85,62.6,282.36,306.15,352.11,547.06,1006.27
2017Q4,720.33,203.46,356.38,427.77,628.23,1004.33,404.19,1329.11,106.31,218.14,149.81,904.13,2537.2,273.43,52.25,7.47,150.25,43.72,40.27,33.42,18.68,1540.97,152.71,372.65,422.32,202.93,907.62,163.86,296.73,185.72,886.37,522.65,159.38,723.71,45.96,48.85,47.73,89.37,225.47,135.57,42.61,141.62,74.85,133.77,159.6,124.16,332.99,173.79,120.89,48.69,206.1,270.1,137.74,257.19,320.02,314.4,184.95,513.79,362.94,189.58,82.85,163.58,2632.95,151.54,49.18,454.86,238.13,95.35,75.1,119.91,45.15,274.79,223.74,288.69,745.97,1102.56


### Q1.

Given that:

- Our goal is to create an 8-step forecasting model.


- We have allocated 16 observations for the validation set. 

How many train-val splits will we end up with under TSCV?

In [15]:
a = np.zeros((9, 16))
for i, arr in enumerate(a):
    arr[0:i+8] = 1
    arr[:i] = -1
df_0 = pd.DataFrame(a.astype(int))
df_0.columns = range(-16, 0)
df_0.index = range(1, 10)
df_0.style.background_gradient(cmap='coolwarm', axis=None)

Unnamed: 0,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1
1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0
2,-1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0
3,-1,-1,1,1,1,1,1,1,1,1,0,0,0,0,0,0
4,-1,-1,-1,1,1,1,1,1,1,1,1,0,0,0,0,0
5,-1,-1,-1,-1,1,1,1,1,1,1,1,1,0,0,0,0
6,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,0,0,0
7,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,0,0
8,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,0
9,-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1


### Answer.

`9 train-val splits`

### Q2.

- Extract the `Canberra` time series.


- Using TSCV on the validation set, calculate the average and standard deviation of the `RMSE` statistic for an "out-of-the-box" `LightGBM` model (i.e. default parameters) that uses a `direct forecasting` approach.


- Set the `random_state` parameter of the `LGBMRegressor()` to 1.


- Search the range of `lookback windows` from <b><u>4 to 16</u></b> and find the optimal lookback that minimizes the sum of the average and stdev. `RMSE` values.


- Place the statistics in a dataframe and print it.

<i>HINT: I suggest you try to implement this from scratch. If you copy paste code without understanding what you're doing, you will most probably get this wrong. It is critical that you get this question right so that bugs/errors will not cascade further down.</i>

In [98]:
model = 'MOR-LGB'
component = 'trend'
ts = df_design[col]
name = 'AAA'
path = r'C:/Users/aamorado/Moog Inc/The Journey towards a Smart Connected Factory - MOOG - AIM/PM/mazak1011/time_series_simulation'
dt = datetime.now().strftime("%d-%m-%y %H-%M-%S")
h = 8
    
est = MultiOutputRegressor(lgb.LGBMRegressor(random_state=1), n_jobs=-1)
param_grid = {
    'w': range(4, 17),
    'h': [h]
}
tscv = TimeSeriesSplit(val_size=16)
gs = GridSearchCV(est, param_grid, tscv, {'rmse': rmse})

col = ('ACT', 'Canberra')

gs.fit(ts)

100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


In [99]:
import pickle
obj = (
    gs
    .df
    .assign(
        model=model, component=component,
        params=lambda x: x.params.astype(str))
    .set_index(['component', 'model', 'params'])
)
display(obj)
with open(f'{path}/{name} {dt}.pickle', 'wb') as handle:
    pickle.dump(
        obj,
        handle)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,4,5,6,7,8
component,model,params,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
trend,MOR-LGB,"{'w': 4, 'h': 8}","[521.7200607630713, 488.24357110322256, 498.4733511829438, 499.49545948483814, 499.403377003239, 452.7435634974129, 501.15914153158366, 472.01830199364855]","[495.13693516824213, 511.31372771216144, 498.81116822159385, 510.0526589530193, 463.3260332529855, 468.8441996096802, 473.9665092217352, 472.35300351241455]","[496.50150976772574, 516.0177578829678, 505.1429149069274, 463.3996223977815, 481.0424125432536, 493.58968425242784, 483.70531802476086, 476.60785159775884]","[507.30318869715757, 504.27916604410365, 458.89796273606987, 468.31484209014474, 503.04977187512554, 496.32032003909126, 457.5074262948738, 503.1709220843486]","[508.6261972726218, 481.1511462773579, 461.6840226997417, 480.8798919517343, 507.78324801577224, 487.98529799011806, 479.3923049197479, 484.4777155500416]","[477.90979452193443, 473.4830970413751, 463.0439034499775, 508.3265158397072, 489.2515051894067, 502.76768941311303, 475.3908323913706, 492.4320607107298]","[467.4421967842828, 451.4887710918659, 499.68411231367395, 485.57023819470936, 503.1076384055485, 480.2226994851062, 486.76691447257656, 490.87501236275415]","[449.334666310798, 488.1455050968606, 480.9927641446864, 489.97489553149387, 468.61104785954916, 495.4259081374457, 488.5582220498318, 501.57450301343374]","[454.41920888532894, 465.2535232763821, 478.54216561062515, 463.39219003767505, 501.273246526125, 507.7642787358966, 479.129587957408, 476.4969084864198]"
trend,MOR-LGB,"{'w': 5, 'h': 8}","[533.196462258115, 478.37158535477386, 510.94483533654625, 475.8105499531224, 498.02902072437547, 448.12214957453534, 508.84188055797813, 485.3194656999158]","[511.4972067685701, 499.7275656057032, 517.8860296597094, 494.93175246088293, 443.34111568512265, 470.2672332351938, 486.1773176891719, 495.6886036761936]","[518.7261862034223, 502.5264136055139, 526.0594869192006, 463.0914482415448, 464.45275496317305, 447.8060316710195, 484.36918022913585, 478.35634750172966]","[501.59776502207285, 508.37502060145306, 453.92326326847467, 480.10786309083187, 495.09803244160344, 500.5068946856738, 469.34439516942257, 489.48040534362303]","[492.71149967268485, 462.6278096595511, 449.6372062352794, 502.0351041091716, 502.25782451422555, 481.28860182408846, 494.34818112649293, 471.1761135512496]","[471.56907116495825, 461.6840226997417, 463.3622790694078, 506.5172034022836, 496.48906016143434, 503.49401670084086, 473.93204858101967, 503.73589461796365]","[483.96251064810207, 448.03751627931064, 504.52781493533956, 488.5537817904812, 493.9110088482926, 470.66464232600896, 496.075802772249, 495.22540039211276]","[440.1821406272423, 503.64401961751196, 460.4174152443773, 504.4464805427864, 485.98912773975394, 515.6852606287614, 475.40126417342947, 480.54932892119837]","[463.4858258759757, 480.87431520950634, 463.55618637178503, 472.55932978527017, 502.76426471376686, 515.7544909923625, 483.5411985643868, 465.4821420283692]"
trend,MOR-LGB,"{'w': 6, 'h': 8}","[528.2062489006604, 489.69437621162535, 501.5612763520464, 467.24768292407674, 478.0741463792554, 454.88120162557885, 522.1957131915813, 494.2726088319394]","[496.6446976460287, 517.0871170090712, 498.67115469619057, 489.9310685431094, 429.2362041584299, 478.961968853472, 498.5950293202243, 502.22063892299656]","[520.4558251760325, 520.1811633815878, 518.9154580392662, 437.690629199548, 452.7875580336247, 456.2781262094687, 508.06295842904734, 494.27759287549566]","[489.07969746433537, 525.3845870981492, 452.74109497352134, 473.57236695459534, 454.61656954325434, 500.27467765618843, 475.27696063637666, 504.40195936442024]","[498.91248020205666, 458.6397817226491, 463.0774247722113, 499.20798741029836, 512.5654200532158, 490.2155846125816, 473.3953395092687, 471.84618540179025]","[453.24801375223524, 445.20255196660435, 483.3970413766871, 507.12645583724446, 496.56705352482373, 515.1530101088211, 457.3555329855127, 502.1591466001947]","[470.8780996115044, 461.87138763214233, 512.6536626213357, 493.9547732542232, 500.80250350231364, 459.9803231427149, 502.05379748208003, 503.2564146083763]","[438.07335763181175, 504.52781493533956, 467.9622255363573, 499.7270601884228, 477.4375579976651, 508.47791612332134, 481.0459900579837, 474.0899126283366]","[481.45139208641456, 467.53074533906715, 478.13327618825986, 500.4525015353037, 535.2285712496522, 517.174890254609, 455.0102897834011, 451.8001149804189]"
trend,MOR-LGB,"{'w': 7, 'h': 8}","[505.5139479817778, 499.5974921321233, 480.83183535536136, 475.92478748159954, 488.8114997451794, 445.962007517106, 518.9099448001282, 482.73930187599643]","[499.3927950309144, 516.8507094161363, 495.8241370289659, 475.20537885229027, 439.5946860956242, 489.8304219644521, 499.64694490437915, 496.4760921173173]","[526.4173502615905, 507.0272323789939, 509.45043231074396, 428.61970747666044, 465.87411183355243, 470.6149612936378, 510.641593518837, 483.6420722735932]","[510.46849677613403, 516.4096253521035, 422.6162158569869, 455.5452619027052, 466.09437617729634, 526.7300310133276, 486.494853795342, 470.5320936191531]","[516.5315402938883, 448.51780212941566, 449.55987998351225, 457.5488660442527, 520.9129335193943, 504.320571829423, 483.18169390742605, 471.94238911086376]","[450.7401241386628, 456.11141198427106, 478.93156411559175, 514.8607792688334, 501.0967914704682, 495.6757166611538, 466.71597755044934, 501.1292341417601]","[450.90301544746393, 477.0236767797114, 512.4039036518069, 496.33624820312167, 513.5621560072558, 465.42523719694975, 496.4066245194531, 499.5676846483081]","[455.41166887124973, 509.16475849359097, 473.60430183997113, 504.82167102081524, 475.91597309383644, 527.5886287665095, 485.21167704196006, 488.5563017940205]","[489.9418798813168, 466.282881218937, 468.27877092759843, 477.4375579976651, 532.8571516448957, 527.659970186709, 454.5072477622081, 457.6440455646993]"
trend,MOR-LGB,"{'w': 8, 'h': 8}","[490.7295348900334, 478.6797595685413, 470.93187925505276, 480.36573146216176, 478.72983249085956, 445.4842172495295, 491.48531917107755, 473.87760837914504]","[500.4468552514202, 485.1335499019139, 494.80891997798005, 488.8114997451794, 437.77409230458244, 490.0247114707848, 487.9264616031375, 489.44091245689395]","[520.8591287144192, 499.47649825183044, 495.7098596057946, 438.9718557312677, 474.4201577450265, 480.4639866160394, 506.50609120489463, 469.4679826408674]","[505.2014881357127, 509.45043231074396, 418.1086620806408, 465.96696605302884, 476.2457542760211, 525.7781007742943, 483.6420722735932, 460.5071513927638]","[516.4096253521035, 422.6162158569869, 446.1226642454361, 467.05558388934764, 533.7072169134777, 512.5606897409117, 467.9207370511009, 455.8881596394408]","[447.804790422654, 448.83251936551375, 451.52499868079383, 521.002028562615, 510.1057173694897, 499.4830734098291, 471.50870084245025, 485.8365525065309]","[456.11141198427106, 478.93156411559175, 518.5233723464796, 501.0967914704682, 491.7070878860487, 481.28855749441226, 501.7045410922473, 499.2230449562539]","[477.0236767797114, 512.4039036518069, 481.34802294666616, 516.1446639186751, 477.6719940146487, 515.2700078817469, 496.1420981899375, 499.67792025410506]","[509.16475849359097, 473.60430183997113, 483.42509165442925, 475.91597309383644, 535.435069046896, 526.9031201033605, 480.06463487051593, 453.5740102306119]"
trend,MOR-LGB,"{'w': 9, 'h': 8}","[487.89156875610354, 487.83115310668944, 487.92638931274416, 487.1996269226074, 486.9581748962402, 482.6873329162598, 481.3963310241699, 479.9619758605957]","[481.2553243180872, 477.4760099403345, 493.81656501502073, 478.03072307952596, 437.6068275426402, 470.61062696524294, 483.73529476976626, 484.2907708698853]","[494.6031001007983, 497.36173489739264, 508.21336550607236, 436.3157821138258, 473.771976634984, 467.40874021797464, 506.66715324522863, 466.44282798415867]","[492.6937094065717, 493.4902357046059, 429.8302277453181, 475.72236810315786, 484.41291464534316, 516.4990953984192, 467.06260241336963, 463.0708113666096]","[504.359307799844, 418.1086620806408, 451.78451786100163, 477.2696394638501, 531.1901143330533, 502.93526958680906, 453.92682691685343, 450.0162004104679]","[420.38751127908534, 445.4733292087142, 456.90031331286207, 533.9535769764163, 514.9939387409324, 479.19960034275573, 451.32915288060167, 484.75980239679575]","[453.06680075075656, 445.2195636866239, 523.6371517087166, 510.90136620390194, 494.726241047595, 481.0382570350755, 477.5522710957737, 488.81841490624674]","[473.1816499391836, 517.0481132629196, 489.11930740720703, 493.8420661715142, 488.8114410099951, 526.0978325128602, 487.8469924989112, 487.09973172283105]","[506.10592602175893, 478.9896145722097, 496.2923303650375, 479.9974853617776, 524.1893723418036, 531.0707404068006, 487.22791387616525, 448.88799431326214]"
trend,MOR-LGB,"{'w': 10, 'h': 8}","[486.11609434470154, 489.0572235889924, 487.7609268579728, 489.6776044796675, 483.58834056365185, 481.0374982784956, 480.0751475798778, 480.76863372020233]","[487.83115310668944, 487.92638931274416, 487.1996269226074, 486.9581748962402, 482.6873329162598, 481.3963310241699, 479.9619758605957, 479.9220314025879]","[485.91504192517687, 496.23769460117876, 497.1516357795571, 434.0580873801295, 459.4941258271462, 475.2876777134763, 502.3033520383598, 471.3779541576382]","[486.8595243778258, 507.77590021924993, 422.2556800307441, 477.393330524325, 471.5714818431775, 519.1377915861616, 463.43278126036336, 472.0425597551323]","[487.7975202945584, 427.9237712121664, 465.71826375835786, 484.41291464534316, 518.1695683516224, 491.62056962741576, 458.3964732392473, 465.3662524763149]","[414.07567402793535, 451.44698600181255, 467.70467777004137, 532.8058807752778, 510.126447527283, 471.0061254004634, 445.40164064507553, 491.099903973907]","[449.307794393162, 452.91068103852285, 535.8462682735613, 522.9796913648644, 478.36977976593136, 463.6073000609469, 476.9055559170597, 500.4607869113454]","[437.5813546287618, 521.5049293407051, 500.90318080571114, 496.44919203073545, 484.48302457856914, 496.35822392951894, 484.04089058663754, 487.5493347708119]","[509.3383821577641, 487.7257936652459, 474.9078572630524, 499.75030422744703, 533.1384257054038, 513.4577113296405, 465.97719936010105, 453.22069120580505]"
trend,MOR-LGB,"{'w': 11, 'h': 8}","[487.32929671438114, 488.9171656558388, 490.3188854016756, 486.29065423262745, 481.9187999524568, 479.67210388183594, 480.9060114810341, 479.016526874743]","[489.0572235889924, 487.7609268579728, 489.6776044796675, 483.58834056365185, 481.0374982784956, 480.0751475798778, 480.76863372020233, 478.1931426219451]","[487.92638931274416, 487.1996269226074, 486.9581748962402, 482.6873329162598, 481.3963310241699, 479.9619758605957, 479.9220314025879, 477.65836334228516]","[478.892749317321, 497.1516357795571, 422.579375847375, 459.4941258271462, 479.6462812806028, 514.4451114924395, 469.7690824226873, 454.82480692558806]","[499.4388024306995, 422.23822005866555, 466.08540827995574, 471.5714818431775, 520.9388314971169, 487.4753972909989, 466.1161094826426, 451.12476353043945]","[425.84305534344634, 464.0321284649358, 472.30150315662013, 518.1695683516224, 492.1389338456134, 474.1981447069879, 460.0520277768489, 467.43686994451855]","[456.1725708746648, 462.73482550060487, 532.0667748465091, 510.126447527283, 470.70362600978297, 463.60710853068406, 490.51689089631293, 490.791929879213]","[441.6790293413612, 532.5415950797325, 510.52735781933944, 478.36977976593136, 464.314839183942, 494.33444293352477, 493.64215938399616, 463.8465044344941]","[513.497260658283, 495.29417132562395, 482.3306433815058, 484.48302457856914, 497.0728961175064, 506.40217654272755, 474.2604350554867, 447.5268337343173]"
trend,MOR-LGB,"{'w': 12, 'h': 8}","[487.1387527053421, 491.57550791147594, 486.8577278755807, 484.64902640677786, 480.5403219686972, 480.51453049118453, 479.1102633089633, 479.6365760597023]","[488.9171656558388, 490.3188854016756, 486.29065423262745, 481.9187999524568, 479.67210388183594, 480.9060114810341, 479.016526874743, 478.7752067164371]","[487.7609268579728, 489.6776044796675, 483.58834056365185, 481.0374982784956, 480.0751475798778, 480.76863372020233, 478.1931426219451, 478.2117904271835]","[487.1996269226074, 486.9581748962402, 482.6873329162598, 481.3963310241699, 479.9619758605957, 479.9220314025879, 477.65836334228516, 479.3024139404297]","[487.5316553185805, 422.579375847375, 456.0097932260033, 479.6462812806028, 518.3444151109886, 483.2148017616839, 454.82480692558806, 453.3885423925575]","[420.7369482363638, 464.7161122774613, 467.28313873902493, 518.6395528995299, 489.45964567207164, 481.68364677535664, 449.2304466364993, 470.1619562767111]","[464.96000142816115, 476.66138994575726, 519.5987477475377, 492.1389338456134, 474.1981447069879, 457.12039013313745, 469.56034762545073, 471.1594782543864]","[456.7929764962418, 525.6216895583256, 502.5572559112187, 470.70362600978297, 467.5047180956131, 506.5173549731783, 478.75405815131506, 468.3120466527691]","[524.1829609241047, 505.86793388973575, 467.35810488416905, 464.314839183942, 494.33444293352477, 504.53540020789563, 452.46415731321775, 445.5713742902613]"
trend,MOR-LGB,"{'w': 13, 'h': 8}","[489.8215374416775, 488.05311330159503, 485.1862513224284, 483.30809699164496, 481.43026648627387, 478.65802595350476, 479.7501398722331, 478.4634187486437]","[491.57550791147594, 486.8577278755807, 484.64902640677786, 480.5403219686972, 480.51453049118453, 479.1102633089633, 479.6365760597023, 477.61047610721073]","[490.3188854016756, 486.29065423262745, 481.9187999524568, 479.67210388183594, 480.9060114810341, 479.016526874743, 478.7752067164371, 477.06288387900906]","[489.6776044796675, 483.58834056365185, 481.0374982784956, 480.0751475798778, 480.76863372020233, 478.1931426219451, 478.2117904271835, 478.21093123998395]","[486.9581748962402, 482.6873329162598, 481.3963310241699, 479.9619758605957, 479.9220314025879, 477.65836334228516, 479.3024139404297, 479.04980926513673]","[418.0902550136383, 453.32542944432527, 474.8767388692554, 514.725799273606, 485.34449185280425, 474.22531977740533, 451.5999642646624, 463.769710355654]","[467.40064147106773, 466.2842071237929, 516.1866922313792, 489.45964567207164, 481.68364677535664, 453.0201075892185, 470.51790988828446, 464.65277524628647]","[473.0749639958184, 504.0192339457357, 482.1757018824433, 474.1981447069879, 457.12039013313745, 485.9589841175414, 467.4521324716708, 465.81580188203657]","[520.0931977085668, 490.90337037810843, 462.7260819745226, 467.5047180956131, 507.39063093019683, 487.37350481545957, 457.0176743883985, 449.74748232732424]"


In [9]:
gs.df.drop(columns=['Horizon'])

Unnamed: 0,Lookback,Average RMSE,Stdev RMSE,Sum
0,4,66.84,24.38,91.21
1,5,67.72,24.38,92.09
2,6,67.33,26.25,93.58
3,7,66.84,25.43,92.26
4,8,66.66,21.89,88.55
5,9,69.11,22.25,91.35
6,10,69.88,24.09,93.97
7,11,71.39,25.3,96.69
8,12,72.42,26.49,98.91
9,13,72.68,26.91,99.59


<div class="alert alert-block alert-info">
    The optimal lookback that minimizes the sum of the average and stdev. <code>RMSE</code> values.
</div>

In [8]:
pd.DataFrame(gs.best_params, index=[col]).drop(columns=['Horizon'])

Unnamed: 0,Lookback,Average RMSE,Stdev RMSE,Sum
"(ACT, Canberra)",8.0,66.66,21.89,88.55


### Q3.

- Repeat the process done in Q2 for <b><u>for all regions</u></b>.


- Only print out the final dataframe with the optimal lookbacks.

In [9]:
regions = []
for label, content in tqdm.tqdm(list(df_design.items())):
    gs.fit(content)
    regions.append({
        'Region': label,
        **gs.best_params
    })

with open('best_params.json', 'w') as fp:
    json.dump(regions, fp)

100%|██████████| 76/76 [18:54<00:00, 14.92s/it]


In [10]:
pd.DataFrame(regions).set_index('Region').drop(columns=['Horizon'])

Unnamed: 0_level_0,Lookback,Average RMSE,Stdev RMSE,Sum
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(ACT, Canberra)",8.0,66.66,21.89,88.55
"(NSW, Blue Mountains)",4.0,40.43,9.73,50.16
"(NSW, Capital Country)",4.0,41.67,6.88,48.55
"(NSW, Central Coast)",7.0,38.12,5.5,43.61
"(NSW, Central NSW)",5.0,68.05,4.99,73.05
"(NSW, Hunter)",10.0,66.5,11.63,78.13
"(NSW, New England North West)",16.0,30.03,2.11,32.14
"(NSW, North Coast NSW)",11.0,129.4,11.69,141.1
"(NSW, Outback NSW)",8.0,25.37,1.6,26.98
"(NSW, Riverina)",8.0,33.89,5.67,39.55


### Q4.

- Retrain the LightGBM models on the full training set with their respective optimal lookbacks.


- Evaluate them on their test sets and compile the `MAE` and `RMSE` statistics in a dataframe then print them.

In [11]:
df_forecast = forecastUsingConfig(est, regions, df_design, df_test)

In [12]:
rms_region = rateMyForecast(
    df_design,
    df_test,
    df_forecast)

Unnamed: 0_level_0,MAE,RMSE
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
"(ACT, Canberra)",140.75,154.26
"(NSW, Blue Mountains)",60.8,75.68
"(NSW, Capital Country)",54.24,63.71
"(NSW, Central Coast)",31.61,46.02
"(NSW, Central NSW)",87.58,103.34
"(NSW, Hunter)",195.06,207.98
"(NSW, New England North West)",52.92,58.1
"(NSW, North Coast NSW)",229.33,252.25
"(NSW, Outback NSW)",21.75,26.85
"(NSW, Riverina)",34.92,44.11


### Q5.

- Produce aggregate forecasts for the `State` and `Total` levels using the regional forecasts (i.e. bottom-up aggregation).


- Calculate the `MAE` and `RMSE` statistics and print them as a dataframe.

In [13]:
rms_state = rateMyForecast(
    df_train.sum(level=0, axis=1),
    df_test.sum(level=0, axis=1),
    df_forecast.sum(level=0, axis=1))
df_forecast_total = df_forecast.sum(axis=1).rename('Total').to_frame()
rms_total = rateMyForecast(
    df_train.sum(axis=1).rename('Total').to_frame(),
    df_test.sum(axis=1).rename('Total').to_frame(),
    df_forecast_total)
clear_output()
pd.concat([rms_total, rms_state])

Unnamed: 0_level_0,MAE,RMSE
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
Total,4608.4,4687.6
ACT,140.75,154.26
NSW,1220.04,1272.86
NT,143.6,173.27
QLD,836.33,884.42
SA,260.83,278.45
TAS,132.88,151.25
VIC,1230.51,1294.25
WA,653.45,674.67


### Q6.

- Calculate the average `MAE` and `RMSE` across all time series (i.e. all regions, states, and the total series).

In [14]:
(pd
 .concat([rms_region, rms_state, rms_total])
 .mean()
 .rename('All Series')
 .to_frame()
 .T
 )

Unnamed: 0,MAE,RMSE
All Series,167.11,179.87


### Q7.

- Compare the forecasting performance of LightGBM to the ETS/ARIMA models in Assignment 3.


- Which performed better? Why is this the case?


### Answer.

<div class="alert alert-block alert-info">
    <table>
        <tr>
            <td></td>
            <td><b>MAE</b></td>
            <td><b>RMSE</b></td>
        </tr>
        <tr>
            <td>ETS/ARIMA</td>
            <td>83.58</td>
            <td>97.20</td>
        </tr>
        <tr>
            <td>LightGBM</td>
            <td>167.11</td>
            <td>179.87</td>
        </tr>
    </table>
    <ul>
        <li><code>ETS</code>/<code>ARIMA</code> implementation has been more rigorously tuned than the <code>LightGBM</code> implementation here.</li> In the assignment I did at total of ($104+144$) models per time series but here we only did $12$.
        <li>ML models such as GBM are slow to converge and needs a lot of data. For econometric data such as this, <code>statsmodels</code> should be better.</li>
    </ul>
</div>

### Q8.

- What are some things we can do to improve forecast accuracy?

### Answer.

<div class="alert alert-block alert-info">
    <ol>
        <li>If we are sticking to <code>LightGBM</code>, here are our options:</li>
        <ul>
            <li>We can tune <code>LightGBM</code>'s hyper parameters. There are A LOT ($\gg10$). These ought to increase accuracy, but we should avoid overfitting.</li>
            <li>Use <code>sklearn.multioutput.RegressorChain</code> wrapper instead of the <code>MultioutputRegressor</code>.  <code>RegressorChain</code> chains the outputs of the model as features of the next step when outputing multi-step forecasts. This makes sense as there is reason to believe that the previous days' output has correlation with the next.</li>
        </ul><br>
        <li>If we are concerned with the dataset:</li>
        <ul>
            <li><code>LightGBM</code> is capable of using exogenous variables and can increase accuracy of data is available.</li>
            <li>More data would be appreciated.</li>
        </ul><br>
        <li>Other things that can make the <strike>accuracy go brrr</strike> metrics better:</li>
        <ul>
            <li>Try other ML models like <code>XGBoost</code>, <code>CatBoost</code>, ensemble methods.</li>
            <li>Given enough data, we can do <code>RNN</code> based approaches such <code>LSTM</code> and <code>GRU</code>, and maybe even <code>transformers</code>.</li>
        </ul>
    </ol>
</div>