In [1]:
import copy
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pandas as pd

import statsmodels.api as sm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

np.random.seed(1729)
torch.manual_seed(1729)

import sys
sys.path.append("")
sys.path.append("../../")

import preprocess

In [2]:
experiments = {
    "GLOBAL" : True,
    "YEARLY" : False,
    "QUARTERLY" : False,
    "MONTHLY" : False,
    "WEEKLY" : False,
    "DAILY" : False,
    "HOURLY" : False,
}

config = {
    "MASE_SCALE" : True,
    "MASE_SCALE_SEASONALITY" : True,
    "SCALE_SET" : "FULL" # FULL or SUB. Either the all the data or just the last 12 obs.
}

# Periods used in MASE scaling
periods = {
    "YEARLY"    : 1, 
    "QUARTERLY" : 4, 
    "MONTHLY"   : 12, 
    "WEEKLY"    : 52, 
    "DAILY"     : 7, 
    "HOURLY"    : 24
}

In [3]:
all_train_files = glob.glob("..\\data\\M4train\\*")
all_test_files = glob.glob("..\\data\\M4test\\*")
datasets = preprocess.preprocess_dataset(experiments=experiments, config=config, periods=periods, all_test_files=all_test_files, all_train_files=all_train_files)

Creating datasets for experiment GLOBAL.


Done. Created datasets for ['GLOBAL'].

Sizes of the datasets: 
GLOBAL    : (100000,  13), (100000,  6)




In [4]:
class M4Dataset(Dataset):
    """ Dataset for M4 AR(P) models """
    def __init__(self, arr):
        self.X = torch.from_numpy(arr[:,:-1])
        self.Y = torch.from_numpy(arr[:,-1])
    def __len__(self):
        return self.X.shape[0]
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

In [5]:
class MLP(nn.Module):
    """ N layer dense MLP """
    def __init__(self, memory, H=32):
        super(MLP, self).__init__()
        self.l1 = nn.Linear(memory, 1)
        #self.l2 = nn.Linear(H, H)
        #self.l3 = nn.Linear(H, H)
        #self.l4 = nn.Linear(H, H)
        #self.l5 = nn.Linear(H, 1)
        self.init_weights()
    def forward(self, x):
        #x = F.relu(self.l1(x))
        #x = F.relu(self.l2(x))
        #x = F.relu(self.l3(x))
        #x = F.relu(self.l4(x))
        #x = F.relu(self.l5(x))
        x = self.l1(x)
        return x
    def init_weights(self):
        nn.init.normal_(self.l1.weight, std=1e-3)

In [11]:
feedforward_models = {}
max_epochs = 10000
batch_size = 1024
tenacity = 7
for d in datasets.keys():
    # make datasetsx    
    X_train = copy.deepcopy(datasets[d][0])
    #np.random.shuffle(X_train)
    split = int(X_train.shape[0]*0.85)
    ds_train = M4Dataset(X_train[:split,:])
    ds_val = M4Dataset(X_train[split:,:])

    trainloader = DataLoader(ds_train, batch_size=1024, shuffle=True, num_workers=0)
    valloader = DataLoader(ds_val, batch_size=1024, shuffle=True, num_workers=0)

    # make model
    net = MLP(X_train.shape[1]-1, H=32).double()

    # train model
    criterion = nn.L1Loss()
    optimizer = optim.Adam(net.parameters(), lr=0.05)
    print(d)
    #print(f"Start Training {d}.")

    val_losses = list(np.ones(10)*np.inf) # initialize for early stop
    for epoch in range(1, max_epochs+1):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
        #if epoch%10==0:
            #print(f"Epoch {epoch:3>} loss: {running_loss}")
        print(f"Epoch {epoch:3>} loss: {running_loss}")

        # Early stop
        val_loss = 0
        tenacity_count = 0
        with torch.no_grad():
            for i, data in enumerate(valloader, 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs, labels = data

                # forward + backward + optimize
                outputs = net(inputs)

                loss = criterion(outputs, labels)

                # print statistics
                val_loss += loss.item()
        
        # Early stop
        if epoch > tenacity + 1:
            if val_loss < min(val_losses[-tenacity:]):
                tenacity_count = 0
            elif epoch > tenacity:
                tenacity_count += 1
                print(tenacity_count)
        val_losses.append(val_loss)
        if tenacity_count >= tenacity:
            break
        print(f"Epoch {epoch:3>} loss: {val_loss}")

    #print(f"Finished Training {d}.")
    feedforward_models[d] = copy.deepcopy(net)

GLOBAL
Epoch 1 loss: 1500.2011872704886
Epoch 1 loss: 270.075060242223
Epoch 2 loss: 1393.3945276929944
Epoch 2 loss: 249.09172916341535
Epoch 3 loss: 1359.5647369165347
Epoch 3 loss: 238.53038210023408
Epoch 4 loss: 1342.8696911617972
Epoch 4 loss: 232.21471101370355
Epoch 5 loss: 1303.2067252887002
Epoch 5 loss: 243.1909735210323
Epoch 6 loss: 1292.3952779456047
Epoch 6 loss: 224.44589766966297
Epoch 7 loss: 1287.0534064376554
Epoch 7 loss: 220.2492115476961
Epoch 8 loss: 1289.6332733631477
Epoch 8 loss: 219.04286162870258
Epoch 9 loss: 1281.4007607568071
1
Epoch 9 loss: 240.2249775281866
Epoch 10 loss: 1289.2218842115426
1
Epoch 10 loss: 225.6916653363802
Epoch 11 loss: 1287.0443639444986
Epoch 11 loss: 216.26381262834286
Epoch 12 loss: 1294.158823536955
Epoch 12 loss: 216.18160836873207
Epoch 13 loss: 1284.607796048978
1
Epoch 13 loss: 254.51570638396421
Epoch 14 loss: 1280.2143146515168
1
Epoch 14 loss: 244.94261040285556
Epoch 15 loss: 1280.293171306835
1
Epoch 15 loss: 219.75279

KeyboardInterrupt: 

In [7]:
forecasts_feedforward = {}
mase_feedforward = {}

for model in feedforward_models.keys():
    print(model)
    net = feedforward_models[model]
    net.eval()
    # forecast
    X_train = copy.deepcopy(datasets[model][0])
    X_test = copy.deepcopy(datasets[model][1])
    Y_hat = []
    for i in range(X_test.shape[1]):
        if i == 0:
            X = X_train[:,i+1:]
        else:
            X = np.concatenate((X_train[:,(i+1):], X_test[:,:i]), axis=1)
        assert X.shape[1] == len(mod.params)
        X_tensor = torch.from_numpy(X)
        pred = net(X_tensor).detach().numpy().flatten()
        #print(pred.shape)
        assert pred.shape == (X_test.shape[0],)
        Y_hat.append(pred)

    forecasts_feedforward[model] = np.stack(Y_hat, axis=1)
    #print(forecasts_feedforward[model].shape)

    # calculate mase (mae since we have already scaled)
    error = np.mean(np.abs(forecasts_feedforward[model] - X_test))
    mase_feedforward[model] = error

df = pd.DataFrame({"Model": [e for e in experiments.keys() if experiments[e]], "MASE_LINEAR": [m for m in mase.values()], "MASE_FEEDFORWARD": [m for m in mase_feedforward.values()]})
print(df.head(10))

#display in nice table

NameError: name 'mase' is not defined