## This notebook contains the new neural network that (1) removes three consecutive linear layers in the baseline model. (2) In addition, this model is trained with MAPELoss function instead of MSELoss function. (3) This model uses a learning rate scheduler with patience of 10. (4) Also, we add dropout layers to enhance robostness.

In [1]:
import torch
import torch.nn as nn
import torch.utils.data as data_utils
import torch.nn.functional as F
import numpy as np
import pandas as pd

In [2]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# # convert a df to tensor to be used in pytorch
# def df_to_tensor(df):
#     device = get_device()
#     return torch.from_numpy(df.values).float().to(device)

In [3]:
data_path = "/data/workspace_files/"
vols = np.load(data_path + "12_12_sample_lognormal_vol.npy")
names = ["S", "T", "V_atm", "Beta", "Rho", "Volvol", "K"]

multiindex = pd.MultiIndex.from_product([range(i) for i in vols.shape],
                                        names=names
                                       )
full_df = pd.DataFrame(vols.reshape((-1,1)), index=multiindex, columns=["Lognormal_vol"]).reset_index()
print(full_df.shape)

# get features:
data_ranges = {'S': np.linspace(0.005+0.0, 0.07+0.03, num=12),
               'T': np.linspace(0.5, 20., num=5),
               'V_atm': np.linspace(0.001, 0.015, num=3),
               'Beta': np.linspace(0.1, 0.7, num=2),
               'Rho': np.linspace(-0.4, 0.4, num=3),
               'Volvol': np.linspace(0.0001, 0.5, num=5),
               'K': np.linspace(0.005+0.0, 0.07+0.03, num=12)
              }

for key in data_ranges.keys():
    full_df[key] = data_ranges[key][full_df[key]]

test_df = full_df.sample(frac=0.6, replace=False, random_state=1)
train_df = full_df.drop(test_df.index)
valid_df = train_df.sample(frac=0.25, replace=False, random_state=1)
train_df = train_df.drop(valid_df.index) # train: 30%, valid: 10%, test: 60% (sparse data)

train_target = torch.tensor(train_df[['Lognormal_vol']].values.astype(np.float32))
train_features = torch.tensor(train_df.drop('Lognormal_vol', axis = 1).values.astype(np.float32)) 
train_tensor = data_utils.TensorDataset(train_features, train_target) 
train_loader = data_utils.DataLoader(dataset = train_tensor, batch_size = 512, shuffle = True) # revised

valid_target = torch.tensor(valid_df[['Lognormal_vol']].values.astype(np.float32))
valid_features = torch.tensor(valid_df.drop('Lognormal_vol', axis = 1).values.astype(np.float32)) 
valid_tensor = data_utils.TensorDataset(valid_features, valid_target) 
valid_loader = data_utils.DataLoader(dataset = valid_tensor, batch_size = 512, shuffle = False) # revised

test_target = torch.tensor(test_df[['Lognormal_vol']].values.astype(np.float32))
test_features = torch.tensor(test_df.drop('Lognormal_vol', axis = 1).values.astype(np.float32)) 
test_tensor = data_utils.TensorDataset(test_features, test_target) # revised
test_loader = data_utils.DataLoader(dataset = test_tensor, batch_size = 512, shuffle = False) # revised

loaders = {"train": train_loader, "valid": valid_loader, "test": test_loader}

(64800, 8)


In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(7, 16)
        self.fc2 = nn.Linear(16, 64)
        # self.fc3 = nn.Linear(32, 64)
        self.fc4 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.25)
    
    def forward(self, x):
        out = F.relu(self.fc1(x))
        out = F.relu(self.fc2(out))
        out = self.dropout(out)
        # out = F.relu(self.fc3(out))
        # out = self.dropout(out)
        return self.fc4(out)

In [5]:
def train(n_epochs, loaders, model, optimizer, criterion, scheduler, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    best_model = model
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss = train_loss + (1 / (batch_idx + 1)) * (loss.data - train_loss)

        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + (1 / (batch_idx + 1)) * (loss.data - valid_loss)
        
        scheduler.step(loss)

        # print training/validation statistics 
        if epoch % 10 == 0:
            print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
                epoch, 
                train_loss,
                valid_loss
                ))
        
        ## save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            print('Epoch {}: Validation loss decreased from {:.6f} to {:.6f}.'.format(epoch, valid_loss_min, valid_loss))
            valid_loss_min = valid_loss
            best_model = model
    if save_path is not None:
        torch.save(best_model.state_dict(), save_path)
    return best_model
    

In [6]:
data_path = "/data/workspace_files/"

In [7]:
def MAPELoss(output, target):
    loss = (torch.abs(output - target) / torch.abs(target)).mean()
    return loss

In [8]:
model = Net()

def weights_init(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight, gain=torch.nn.init.calculate_gain("linear"))
        m.bias.data.fill_(0)


model.apply(weights_init)

Net(
  (fc1): Linear(in_features=7, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)

In [9]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2, last_epoch=-1)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True, threshold=0.0001, threshold_mode= 'rel', cooldown=0, eps=1e-08)
n_epochs = 100

# from sklearn.metrics import mean_absolute_percentage_error as MAPE
# criterion = MAPE
criterion = MAPELoss
# criterion = nn.MSELoss()
use_cuda = False

In [10]:
# train the model
model = train(n_epochs, loaders, model, optimizer, criterion, scheduler, use_cuda, data_path + 'nn_attempt.pt')

# load the model that got the best validation accuracy
# model.load_state_dict(torch.load(data_path + 'nn_attempt.pt'))

Epoch 1: Validation loss decreased from inf to 21.785204.
Epoch 2: Validation loss decreased from 21.785204 to 2.663019.
Epoch 4: Validation loss decreased from 2.663019 to 1.338873.
Epoch 5: Validation loss decreased from 1.338873 to 1.072735.
Epoch 6: Validation loss decreased from 1.072735 to 0.529367.
Epoch 7: Validation loss decreased from 0.529367 to 0.440211.
Epoch 8: Validation loss decreased from 0.440211 to 0.314261.
Epoch 9: Validation loss decreased from 0.314261 to 0.168549.
Epoch: 10 	Training Loss: 0.231294 	Validation Loss: 0.111899
Epoch 10: Validation loss decreased from 0.168549 to 0.111899.
Epoch 17: Validation loss decreased from 0.111899 to 0.111830.
Epoch 18: Validation loss decreased from 0.111830 to 0.099071.
Epoch 19: Validation loss decreased from 0.099071 to 0.081912.
Epoch: 20 	Training Loss: 0.173868 	Validation Loss: 0.082627
Epoch 27: Validation loss decreased from 0.081912 to 0.069057.
Epoch: 30 	Training Loss: 0.147403 	Validation Loss: 0.065618
Epoch 

In [11]:
from sklearn.metrics import mean_absolute_error as MAE, mean_squared_error as MSE, mean_absolute_percentage_error as MAPE

pred = model(test_features)

print(MAE(np.squeeze(pred.cpu().detach().numpy()), test_target))
print(MSE(np.squeeze(pred.cpu().detach().numpy()), test_target))
print(MAPE(np.squeeze(pred.cpu().detach().numpy()), test_target))

0.0056963167
0.00026712005
0.03936223


In [16]:
def run_n(n, model_params):
    # model_params['model']# do it just in case
    runs_mae = []
    runs_mse = []
    runs_mape = []
    for run in range(n):
        model = Net()
        model.apply(model_params['init_fn'])
        model = train(model_params['n_epochs'], model_params['loaders'], \
        model, torch.optim.Adam(model.parameters(), lr=0.001), model_params['criterion'], \
        torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True, threshold=0.0001, threshold_mode= 'rel', cooldown=0, eps=1e-08),\
        model_params['use_cuda'], None)

        pred = model(model_params['test_features'])
        runs_mae.append(MAE(np.squeeze(pred.cpu().detach().numpy()), model_params['test_target']))
        runs_mse.append(MSE(np.squeeze(pred.cpu().detach().numpy()), model_params['test_target']))
        runs_mape.append(MAPE(np.squeeze(pred.cpu().detach().numpy()), model_params['test_target']))
        
    print(f"mae mean: {np.mean(runs_mae)} std: {np.std(runs_mae)}")
    print(f"mse mean: {np.mean(runs_mse)} std: {np.std(runs_mse)}")
    print(f"mape mean: {np.mean(runs_mape)} std: {np.std(runs_mape)}")

In [18]:
model_params = {
    'train_loader': train_loader,
    'test_features': test_features,
    'test_target': test_target,
    'model': Net(),
    # 'optimizer': torch.optim.Adam(model.parameters(), lr=0.001),
    'n_epochs': 100,
    'criterion': MAPELoss,
    'init_fn': weights_init,
    'loaders': {"train": train_loader, "valid": valid_loader, "test": test_loader},
    'use_cuda': False,
    # 'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True, threshold=0.0001, threshold_mode= 'rel', cooldown=0, eps=1e-08)
}

run_n(30, model_params)

Epoch 1: Validation loss decreased from inf to 1.462438.
Epoch 2: Validation loss decreased from 1.462438 to 1.182286.
Epoch 3: Validation loss decreased from 1.182286 to 0.387316.
Epoch 4: Validation loss decreased from 0.387316 to 0.193426.
Epoch 5: Validation loss decreased from 0.193426 to 0.113042.
Epoch 6: Validation loss decreased from 0.113042 to 0.101003.
Epoch 9: Validation loss decreased from 0.101003 to 0.095122.
Epoch: 10 	Training Loss: 0.174444 	Validation Loss: 0.105714
Epoch 11: Validation loss decreased from 0.095122 to 0.085316.
Epoch 14: Validation loss decreased from 0.085316 to 0.075383.
Epoch 18: Validation loss decreased from 0.075383 to 0.074658.
Epoch 19: Validation loss decreased from 0.074658 to 0.066332.
Epoch: 20 	Training Loss: 0.139585 	Validation Loss: 0.055579
Epoch 20: Validation loss decreased from 0.066332 to 0.055579.
Epoch 22: Validation loss decreased from 0.055579 to 0.051207.
Epoch 26: Validation loss decreased from 0.051207 to 0.043232.
Epoch: