[Forecasting: Principles and Practice](https://otexts.com/fpp2/)

In [1]:
from graph_traffic.get_data import get_data
from graph_traffic.dataloading import npzDataset
from graph_traffic.baseline_models import MeanRegressor, MedianRegressor, RepeatRegressor, RepeatLastRegressor, DaytimeRegressor, DriftRegressor
import numpy as np
from sklearn.metrics import mean_squared_error
import pandas as pd

data_dict = dict(
    ids_list=[3954, 3973, 3978],
    seq_len=4,
    with_graph=False,
    from_date="2019-03-01",
    to_date="2020-03-31",
    dataset_name="small",
    target="intensidad",
    interactions=None
)

meteo_dict = dict(
    rain="drop",
    wind="drop",
    temperature="drop",
    humidity="drop",
    pressure="drop",
    radiation="drop"
)

temporal_dict = dict(
    season="drop",
    month="drop",
    day_of_month="drop",
    hour="passthrough",
    minute="drop",
    bank_holiday="drop",
    school_holiday="drop",
    working_day="passthrough"
)

x, y = get_data(data_dict, meteo_dict, temporal_dict)
dataset_name = data_dict["dataset_name"]
n_points = None

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_data = npzDataset(dataset_name, "train", n_points)
test_data = npzDataset(dataset_name, "test", n_points)
valid_data = npzDataset(dataset_name, "valid", n_points)

In [3]:
x_train = train_data.x
y_train = train_data.y[:, :, :, [0]]

x_valid = valid_data.x
y_valid = valid_data.y[:, :, :, [0]]

x_test = test_data.x
y_test = test_data.y[:, :, :, [0]]

# 1. Mean regressor

In [17]:
mean_reg = MeanRegressor()
mean_reg.fit(x_train, y_train)
train_pred = mean_reg.predict(x_train)
valid_pred = mean_reg.predict(x_valid)
test_pred = mean_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred-y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred-y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred-y_test).mean():.2f}")

Training MRE: 230.10
Validation MRE: 233.64
Test MRE: 248.61


In [34]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

-6.509850945068991e-15

# 2. Median regressor

In [35]:
median_reg = MedianRegressor()
median_reg.fit(x_train, y_train)
train_pred = median_reg.predict(x_train)
valid_pred = median_reg.predict(x_valid)
test_pred = median_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred-y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred-y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred-y_test).mean():.2f}")

Training MRE: 225.26
Validation MRE: 229.88
Test MRE: 243.76


In [36]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

-90.29161565266529

# 3. Repeat regressor

In [37]:
repeat_reg = RepeatRegressor()
repeat_reg.fit(x_train, y_train)
train_pred = repeat_reg.predict(x_train)
valid_pred = repeat_reg.predict(x_valid)
test_pred = repeat_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred - y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred - y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred - y_test).mean():.2f}")

Training MRE: 88.29
Validation MRE: 93.74
Test MRE: 91.22


In [38]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

0.7481399510265587

# 4. Repeat last value regressor

In [39]:
repeatlast_reg = RepeatLastRegressor()
repeatlast_reg.fit(x_train, y_train)
train_pred = repeatlast_reg.predict(x_train)
valid_pred = repeatlast_reg.predict(x_valid)
test_pred = repeatlast_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred - y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred - y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred - y_test).mean():.2f}")

Training MRE: 67.80
Validation MRE: 71.52
Test MRE: 70.08


In [40]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

0.6298384818233189

# 5. Mean per hour

In [41]:
daytimemean_reg = DaytimeRegressor("mean")
daytimemean_reg.fit(x_train[:, :, :, [0, 1]], y_train)
train_pred = daytimemean_reg.predict(x_train)
valid_pred = daytimemean_reg.predict(x_valid)
test_pred = daytimemean_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred - y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred - y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred - y_test).mean():.2f}")

Training MRE: 169.47
Validation MRE: 174.00
Test MRE: 190.47


In [42]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

-112.61679602363321

In [43]:
daytimeworkingdaymean_reg = DaytimeRegressor("mean", by_working_day=True)
daytimeworkingdaymean_reg.fit(x_train[:, :, :, [0, 1, 2]], y_train)
train_pred = daytimeworkingdaymean_reg.predict(x_train)
valid_pred = daytimeworkingdaymean_reg.predict(x_valid)
test_pred = daytimeworkingdaymean_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred - y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred - y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred - y_test).mean():.2f}")

Training MRE: 134.16
Validation MRE: 138.43
Test MRE: 166.42


In [44]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

-103.60850535569408

# 6. Median per hour

In [45]:
daytimemedian_reg = DaytimeRegressor("median")
daytimemedian_reg.fit(x_train[:, :, :, [0, 1]], y_train)
train_pred = daytimemedian_reg.predict(x_train)
valid_pred = daytimemedian_reg.predict(x_valid)
test_pred = daytimemedian_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred - y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred - y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred - y_test).mean():.2f}")

Training MRE: 170.99
Validation MRE: 174.97
Test MRE: 190.41


In [46]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

-161.31172301751744

In [47]:
daytimeworkingdaymedian_reg = DaytimeRegressor("median", by_working_day=True)
daytimeworkingdaymedian_reg.fit(x_train[:, :, :, [0, 1, 2]], y_train)
train_pred = daytimeworkingdaymedian_reg.predict(x_train)
valid_pred = daytimeworkingdaymedian_reg.predict(x_valid)
test_pred = daytimeworkingdaymedian_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred - y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred - y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred - y_test).mean():.2f}")

Training MRE: 134.70
Validation MRE: 138.34
Test MRE: 166.91


In [48]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

-94.96779643058956

# 7. Drift regressor

In [50]:
drift_reg = DriftRegressor()
drift_reg.fit(x_train, y_train)
train_pred = drift_reg.predict(x_train)
valid_pred = drift_reg.predict(x_valid)
test_pred = drift_reg.predict(x_test)
print(f"Training MRE: {np.abs(train_pred - y_train).mean():.2f}")
print(f"Validation MRE: {np.abs(valid_pred - y_valid).mean():.2f}")
print(f"Test MRE: {np.abs(test_pred - y_test).mean():.2f}")

Training MRE: 78.15
Validation MRE: 82.71
Test MRE: 82.03


In [51]:
res = (train_pred-y_train)[:, :, 0, 0].ravel()
res.mean()

0.856373022226408

In [52]:
from statsmodels.stats.diagnostic import acorr_ljungbox

In [54]:
acorr_ljungbox(res, lags=5)

Unnamed: 0,lb_stat,lb_pvalue
1,38331.365109,0.0
2,61634.589144,0.0
3,87264.089048,0.0
4,105711.441395,0.0
5,112263.17769,0.0


In [56]:
acorr_ljungbox(res, lags=10)

Unnamed: 0,lb_stat,lb_pvalue
1,38331.365109,0.0
2,61634.589144,0.0
3,87264.089048,0.0
4,105711.441395,0.0
5,112263.17769,0.0
6,116983.880889,0.0
7,118493.435964,0.0
8,120959.158414,0.0
9,121208.738282,0.0
10,121724.623876,0.0
