In [1]:
import torch
from torch import nn
import numpy as np
import pandas as pd
#23/05/2023 - done with the most basic form of MLP implementation, next: start tuning hyperparameters & improving model architecture

In [5]:
#Config Parameters
quantiles = [0.05, 0.10, 0.20, 0.30, 0.40, 0.60, 0.70, 0.80, 0.90, 0.95]
quantile_pairs = [(0.05, 0.95), (0.10, 0.90), (0.20, 0.80), (0.30, 0.70), (0.40, 0.60)]
batch_size, lr, n_epochs = 240, 0.001, 100
lag_period, num_features, forecast_horizon = 20, 14, 2
num_inputs, num_outputs, num_hidden = lag_period*num_features, (1+len(quantiles))*num_features, 400
i_train = 2400
i_val = i_train + 800
i_test = i_val + 1600 
#2400 for training, 800 for validation, 1600 for testing

In [6]:
#Load & Split Data
data = pd.read_csv("/Users/lixiang/Desktop/DeepJMQR Project/data.csv")
alldata = np.array(data)[:,1:].astype("float32")
torch.set_default_dtype(torch.float32)
X = []
for i in range(lag_period, len(data)):
    X.append(alldata[i-lag_period:i])
X = torch.tensor(np.array(X), requires_grad = True)
Y = alldata[lag_period+forecast_horizon:]

X_train, Y_train = X[:i_train], torch.tensor(Y[:i_train])
X_val, Y_val = X[i_train:i_val], Y[i_train:i_val]
X_test, Y_test = X[i_val:i_test], Y[i_val:i_test]
train_iter = torch.utils.data.DataLoader(list(zip(X_train,Y_train)), batch_size=batch_size, shuffle = True)
val_iter = torch.utils.data.DataLoader(list(zip(X_val,Y_val)), batch_size=batch_size, shuffle = False)

In [7]:
#Evaluation & Loss Functions
def tilted_loss(τ, y, ŷ):
    #inputs same shape as defined in lossfn, could be tensors/np.array
    j = len(τ)
    if len(ŷ.shape) == 1:
        y, ŷ = y.reshape(-1,j+1), ŷ.reshape(-1,j+1)
    loss = 0.0
    for i in range(j):
        q = τ[i]
        r = y[:, i+1] - ŷ[:, i+1]
        loss += sum(q*r - r*(r<0))
    return loss

def lossfn(τ, y, ŷ):
    #for ConvLSTM & MLP
    #τ: quantile vector of length J
    #y, ŷ: observation & prediction, tensors of dim M x (1+J) or 1D tensor of the form [μ q1 q2 q3...] x M
    j = len(τ)
    if len(ŷ.shape) == 1:
        y, ŷ = y.reshape(-1,j+1), ŷ.reshape(-1,j+1)
    loss = tilted_loss(τ, y, ŷ)
    loss += torch.sum(torch.square(y[:,0]-ŷ[:,0]))
    return loss

#for evaluation: remember to turn tensors into np.array()
def eval_quantiles(lower, upper, preds):
    #all inputs are np.array() of same length
    icp = np.mean((preds > lower) & (preds < upper))
    mil = np.mean(np.maximum(0,upper-lower))
    return icp,mil

def crossing_loss(τ,ŷ):
    #ŷ: np.array() of same shape as defined in lossfn
    j = len(τ)
    if len(ŷ.shape) == 1:
        ŷ = ŷ.reshape(-1,j+1)
    loss = 0.0 #crossing loss as defined in the paper
    num_cross = 0.0
    for i in range(len(ŷ[0,:])-2):
        q = ŷ[:,i+1] - ŷ[:,i+2]
        loss += np.sum(np.maximum(q,0))
        num_cross += np.sum(q>0)
    return loss, num_cross

def eval_error(y, ŷ):
    #y, ŷ: np.array() of same shape M x (1+J) or (1+J) vector
    r = np.abs(y-ŷ)
    mse = np.mean(r*r)
    rmse = np.sqrt(mse)
    mae = np.mean(r)
    return mse, rmse, mae


In [8]:
#Initialize Model
net = nn.Sequential(nn.Flatten(), nn.Linear(num_inputs,num_hidden), nn.ReLU(), nn.Linear(num_hidden, num_outputs))
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std = 0.01)
net.apply(init_weights)
optimizer = torch.optim.SGD(net.parameters(), lr = lr)
def train(model, train_iter, test_iter, quantiles, loss_fn, optimizer, num_epochs = 100):
    running_loss = 0
    last_loss = 0
    for i, data in enumerate(train_iter):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(quantiles, labels, outputs)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        last_loss = loss.item()
    return last_loss

In [9]:
#Train Model
best_vloss = 1e9
for epoch in range(n_epochs):
    net.train(True)
    avg_loss = train(net, train_iter, val_iter, quantiles, lossfn, optimizer, num_epochs = n_epochs)
    net.train(False)
    running_vloss = 0.0
    for i, vdata in enumerate(val_iter):
        vinputs, vlabels = vdata
        voutputs =  net(vinputs)
        vloss = lossfn(quantiles, vlabels, voutputs)
        running_vloss += vloss
    avg_vloss = float(running_vloss / (i+1))
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        torch.save(net.state_dict(), "model_best_state")
        print("epoch:",epoch+1,avg_loss, avg_vloss,"\n")

epoch: 1 39.2470703125 25.77938461303711 

epoch: 2 37.880794525146484 19.175527572631836 

epoch: 5 44.876529693603516 12.883186340332031 

epoch: 13 39.54420852661133 9.172379493713379 



In [10]:
#Evaluate Errors
model = nn.Sequential(nn.Flatten(), nn.Linear(num_inputs,num_hidden), nn.ReLU(), nn.Linear(num_hidden, num_outputs))
model.load_state_dict(torch.load("model_best_state"))
pred = model(X_val).detach().numpy().reshape(-1,num_features, 1+len(quantiles))
print("Crossing Loss:", crossing_loss(quantiles, pred))
err = []
tloss = 0.0
for i in range(1+len(quantiles)):
    err.append(eval_error(pred[:,:,i],Y_val))
    tloss += tilted_loss(quantiles, Y_val, pred[:,:,i])
for pairs in quantile_pairs:
    l,u = pairs
print("MSE:",np.mean([x[0] for x in err]), "RMSE:",np.mean([x[1] for x in err]),"MAE:", np.mean([x[2] for x in err]))
print("Tilted loss:",tloss)
#Test data will be touched after everything is done i.e. tuning hyperparameters & adjusting model architecture

Crossing Loss: (1933.4830589294434, 55200.0)
MSE: 0.0009297246 RMSE: 0.030343968 MAE: 0.026575156
Tilted loss: 1348.7804559631036
