## (1) This notebook changes the patience of learning rate scheduler to 30. As patience increases, more epochs are needed to train the neural network. (2) So we increase the number of epochs to 300.

In [20]:
import torch
import torch.nn as nn
import torch.utils.data as data_utils
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error as MAE, mean_squared_error as MSE, mean_absolute_percentage_error as MAPE

In [21]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# # convert a df to tensor to be used in pytorch
# def df_to_tensor(df):
#     device = get_device()
#     return torch.from_numpy(df.values).float().to(device)

In [22]:
# trial
batch_size = 512
n_epochs = 300
learning_rate = 0.001

# batch_size = 2048
# n_epochs = 1000
# learning_rate = 0.001

In [23]:
data_path = "/data/workspace_files/"
vols = np.load(data_path + "12_12_sample_lognormal_vol.npy")
names = ["S", "T", "V_atm", "Beta", "Rho", "Volvol", "K"]

multiindex = pd.MultiIndex.from_product([range(i) for i in vols.shape],
                                        names=names
                                       )
full_df = pd.DataFrame(vols.reshape((-1,1)), index=multiindex, columns=["Lognormal_vol"]).reset_index()

# get features:
data_ranges = {'S': np.linspace(0.005+0.0, 0.07+0.03, num=12),
               'T': np.linspace(0.5, 20., num=5),
               'V_atm': np.linspace(0.001, 0.015, num=3),
               'Beta': np.linspace(0.1, 0.7, num=2),
               'Rho': np.linspace(-0.4, 0.4, num=3),
               'Volvol': np.linspace(0.0001, 0.5, num=5),
               'K': np.linspace(0.005+0.0, 0.07+0.03, num=12)
              }

for key in data_ranges.keys():
    full_df[key] = data_ranges[key][full_df[key]]

test_df = full_df.sample(frac=0.6, replace=False, random_state=1)
print(test_df.shape)
train_df = full_df.drop(test_df.index)
valid_df = train_df.sample(frac=0.25, replace=False, random_state=1)
train_df = train_df.drop(valid_df.index) # train: 30%, valid: 10%, test: 60% (sparse data)

train_target = torch.tensor(train_df[['Lognormal_vol']].values.astype(np.float32))
train_features = torch.tensor(train_df.drop('Lognormal_vol', axis = 1).values.astype(np.float32)) 
train_tensor = data_utils.TensorDataset(train_features, train_target) 
train_loader = data_utils.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True) 

valid_target = torch.tensor(valid_df[['Lognormal_vol']].values.astype(np.float32))
valid_features = torch.tensor(valid_df.drop('Lognormal_vol', axis = 1).values.astype(np.float32)) 
valid_tensor = data_utils.TensorDataset(valid_features, valid_target) 
valid_loader = data_utils.DataLoader(dataset = valid_tensor, batch_size = batch_size, shuffle = False) 

test_target = torch.tensor(test_df[['Lognormal_vol']].values.astype(np.float32))
test_features = torch.tensor(test_df.drop('Lognormal_vol', axis = 1).values.astype(np.float32)) 
test_tensor = data_utils.TensorDataset(test_features, test_target) # revised
test_loader = data_utils.DataLoader(dataset = test_tensor, batch_size = batch_size, shuffle = False) 

loaders = {"train": train_loader, "valid": valid_loader, "test": test_loader}

(38880, 8)


In [24]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(7, 16)
        self.fc2 = nn.Linear(16, 64)
        # self.fc3 = nn.Linear(32, 64)
        self.fc4 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.25)
    
    def forward(self, x):
        out = F.relu(self.fc1(x))
        out = F.relu(self.fc2(out))
        out = self.dropout(out)
        # out = F.relu(self.fc3(out))
        # out = self.dropout(out)
        return self.fc4(out)

In [25]:
def train(n_epochs, loaders, model, optimizer, criterion, scheduler, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    best_model = model
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss = train_loss + (1 / (batch_idx + 1)) * (loss.data - train_loss)

        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + (1 / (batch_idx + 1)) * (loss.data - valid_loss)
        
        scheduler.step(loss)

        # print training/validation statistics 
        if epoch % 10 == 0:
            print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
                epoch, 
                train_loss,
                valid_loss
                ))
        
        ## save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            print('Epoch {}: Validation loss decreased from {:.6f} to {:.6f}.'.format(epoch, valid_loss_min, valid_loss))
            valid_loss_min = valid_loss
            best_model = model
    if save_path is not None:
        torch.save(best_model.state_dict(), save_path)
    return best_model
    

In [26]:
data_path = "/data/workspace_files/"

In [27]:
def MAPELoss(output, target):
    loss = (torch.abs(output - target) / torch.abs(target)).mean()
    return loss

def MAXLoss(output, target):
    loss = torch.max(torch.abs(output - target))
    return loss

def MAPEMAXLoss(output, target, alpha=1.0, beta=1.0): # revised
    return alpha * MAPELoss(output, target) + beta * MAXLoss(output, target)

In [28]:
# model = Net()

# def weights_init(m):
#     if isinstance(m, nn.Linear):
#         torch.nn.init.xavier_uniform_(m.weight, gain=torch.nn.init.calculate_gain("linear"))
#         m.bias.data.fill_(0)


# model.apply(weights_init)

In [29]:
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2, last_epoch=-1)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True, threshold=0.0001, threshold_mode= 'rel', cooldown=0, eps=1e-08)

# criterion = MAPEMAXLoss
# # criterion = nn.MSELoss()
# use_cuda = False

In [30]:
# # train the model
# model = train(n_epochs, loaders, model, optimizer, criterion, scheduler, use_cuda, data_path + 'nn_attempt.pt')

# # load the model that got the best validation accuracy
# # model.load_state_dict(torch.load(data_path + 'nn_attempt.pt'))

In [31]:
# pred = model(test_features)

# print(MAE(np.squeeze(pred.cpu().detach().numpy()), test_target))
# print(MSE(np.squeeze(pred.cpu().detach().numpy()), test_target))
# print(MAPE(np.squeeze(pred.cpu().detach().numpy()), test_target))

In [32]:
# # Stability Analysis

# def run_n(n, model_params):
#     # model_params['model']# do it just in case
#     runs_mae = []
#     runs_mse = []
#     runs_mape = []
#     for run in range(n):
#         model = Net()
#         model.apply(model_params['init_fn'])
#         model = train(model_params['n_epochs'], model_params['loaders'], \
#         model, torch.optim.Adam(model.parameters(), lr=0.001), model_params['criterion'], \
#         torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True, threshold=0.0001, threshold_mode= 'rel', cooldown=0, eps=1e-08),\
#         model_params['use_cuda'], None)

#         pred = model(model_params['test_features'])
#         runs_mae.append(MAE(np.squeeze(pred.cpu().detach().numpy()), model_params['test_target']))
#         runs_mse.append(MSE(np.squeeze(pred.cpu().detach().numpy()), model_params['test_target']))
#         runs_mape.append(MAPE(np.squeeze(pred.cpu().detach().numpy()), model_params['test_target']))
        
#     print(f"mae mean: {np.mean(runs_mae)} std: {np.std(runs_mae)}")
#     print(f"mse mean: {np.mean(runs_mse)} std: {np.std(runs_mse)}")
#     print(f"mape mean: {np.mean(runs_mape)} std: {np.std(runs_mape)}")

In [33]:
# # Stability Analysis

# model_params = {
#     'train_loader': train_loader,
#     'test_features': test_features,
#     'test_target': test_target,
#     'model': Net(),
#     # 'optimizer': torch.optim.Adam(model.parameters(), lr=0.001),
#     'n_epochs': n_epochs,
#     'criterion': MAPEMAXLoss,
#     'init_fn': weights_init,
#     'loaders': {"train": train_loader, "valid": valid_loader, "test": test_loader},
#     'use_cuda': False,
#     # 'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True, threshold=0.0001, threshold_mode= 'rel', cooldown=0, eps=1e-08)
# }

# run_n(5, model_params) # trial
# # run_n(30, model_params)

In [34]:
def weights_init(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight, gain=torch.nn.init.calculate_gain("linear"))
        m.bias.data.fill_(0)

In [35]:
output_df = full_df.copy()
output_df["is_train"] = True
output_df.loc[test_df.index, "is_train"] = False

# output_df["pred"] = model(torch.Tensor(full_df[names].values)).detach().numpy()

In [36]:
for loss_name, loss_func in zip(["mapeloss","maxloss","mixedloss"], [MAPELoss, MAXLoss, MAPEMAXLoss]):
    print(f"\nTRAIN MODEL USING {loss_name.upper()} LOSS FUNCTION")
    model = Net()
    model.apply(weights_init)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    # revised
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=30, verbose=True, threshold=0.0001, threshold_mode= 'rel', cooldown=0, eps=1e-08)
    criterion = loss_func
    use_cuda = False
    model = train(n_epochs, loaders, model, optimizer, criterion, scheduler, use_cuda, data_path + f'nn_{loss_name}.pt')

    output_df[f"pred_{loss_name}"] = model(torch.Tensor(full_df[names].values)).detach().numpy()

print("\nFINISHED")


TRAIN MODEL USING MAPELOSS LOSS FUNCTION
Epoch 1: Validation loss decreased from inf to 5.424998.
Epoch 2: Validation loss decreased from 5.424998 to 5.348693.
Epoch 3: Validation loss decreased from 5.348693 to 4.400710.
Epoch 4: Validation loss decreased from 4.400710 to 1.170526.
Epoch 5: Validation loss decreased from 1.170526 to 1.001789.
Epoch 6: Validation loss decreased from 1.001789 to 0.483139.
Epoch 7: Validation loss decreased from 0.483139 to 0.250517.
Epoch 8: Validation loss decreased from 0.250517 to 0.193512.
Epoch 9: Validation loss decreased from 0.193512 to 0.160922.
Epoch: 10 	Training Loss: 0.215806 	Validation Loss: 0.149590
Epoch 10: Validation loss decreased from 0.160922 to 0.149590.
Epoch 11: Validation loss decreased from 0.149590 to 0.137906.
Epoch 12: Validation loss decreased from 0.137906 to 0.128576.
Epoch 13: Validation loss decreased from 0.128576 to 0.110631.
Epoch 14: Validation loss decreased from 0.110631 to 0.098835.
Epoch 18: Validation loss de

In [37]:
output_df.head()

Unnamed: 0,S,T,V_atm,Beta,Rho,Volvol,K,Lognormal_vol,is_train,pred_mapeloss,pred_maxloss,pred_mixedloss
0,0.005,0.5,0.001,0.1,-0.4,0.0001,0.005,0.19802,False,0.194161,0.22155,0.205326
1,0.005,0.5,0.001,0.1,-0.4,0.0001,0.013636,0.195679,True,0.192433,0.214536,0.203053
2,0.005,0.5,0.001,0.1,-0.4,0.0001,0.022273,0.193395,False,0.190705,0.208635,0.20078
3,0.005,0.5,0.001,0.1,-0.4,0.0001,0.030909,0.191166,False,0.188978,0.203251,0.198506
4,0.005,0.5,0.001,0.1,-0.4,0.0001,0.039545,0.18899,True,0.18725,0.200768,0.196233


In [39]:
output_df.to_pickle(data_path + "nn_pred_data_30patience.pkl")