In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
from sklearn import metrics

import paper_utils as pu

pu.set_plots()

# Load data

In [None]:
data = {project: pu.load(project) for project in pu.PROJECTS}

# Remove weekends for those projects where we don't use weekends
for project in data:
    if not pu.choose_weekend[project]:
        weekday = data[project].index.weekday.isin(range(5))
        data[project] = data[project].loc[weekday, :].copy(deep=True)

In [None]:
test_dates = [
    "2021-08-03",
    "2021-08-04",
    "2021-08-05",
    "2021-08-06",
    "2021-08-09",
    "2021-08-10",
    "2021-08-11",
    "2021-08-12",
    "2021-08-13",
    "2021-08-17",
]

In [None]:
for p in data:
    print(len(data[p].loc[:"2021-08-02"]))

In [None]:
test_data = {}
train_data = {}
test_sz = 10

for p in data:
    test_data[p] = data[p].loc[test_dates].copy(deep=True)
    train_data[p] = data[p].loc[:"2021-08-02"].iloc[0:34].copy(deep=True)

In [None]:
models = {p: {} for p in data}
preds = {p: {} for p in data}
scores = {p: {} for p in data}
for p in train_data:
    for n in range(10, len(train_data[p])):
        weekend = pu.choose_weekend[p]
        models[p][n] = pu.create_model(train_data[p].iloc[0:n], weekend=weekend)
        preds[p][n] = pu.predict(models[p][n], test_data[p], weekend=weekend)
        scores[p][n] = metrics.mean_absolute_percentage_error(*preds[p][n])

total_preds = {}
total_scores = {}
for n in range(10, len(train_data[p])):
    total_preds[n] = (
        sum(preds[p][n][0] for p in preds),
        sum(preds[p][n][1] for p in preds),
    )
    total_scores[n] = metrics.mean_absolute_percentage_error(*total_preds[n])

In [None]:
scores = pd.DataFrame(scores)
f, ax = plt.subplots(figsize=(pu.PAGE_WIDTH / 2, 0.75 * pu.ROW_HEIGHT))
for p in scores.columns:
    ax.plot(scores[p] * 100, label=p)
ax.plot(pd.Series(total_scores) * 100, label="Portfolio", color="k", ls="--")
ax.set_ylabel("Test set MAPE (%)")
ax.set_xlabel("Number of observations in training set")
ax.set_ylim(bottom=0)
ax.legend(loc=6, bbox_to_anchor=(1, 0.5))
f.tight_layout()
if pu.save_fig:
    f.savefig(pu.fig_path / "Figure 6.pdf")