In [22]:
import numpy as np
import pandas as pd
import random
import torch
from torch.utils.data import DataLoader
from ivyspt.input_processing import split_surfaces, IVSurfaceDataset
from ivyspt.ivyspt import IvySPT
from ivyspt.market_sensitivity import MarketSensitivityAnalysis
from ivyspt.convergence_ntk import ConvergenceNTK
from ivyspt.loss_flatness import OptimalLossFlatness


# Set the random seed for reproducibility
RANDOM_STATE = 0
N_JOBS = 8
torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

In [20]:
pre_train_data = pd.read_csv('data/pre_train_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
fine_tune_data = pd.read_csv('data/fine_tune_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
pre_train_surfaces_train, pre_train_surfaces_validation, pre_train_surfaces_test = split_surfaces(
    pre_train_data,
    toy_sample=True,
    max_points=20,
    max_surfaces=5,
    random_state=RANDOM_STATE
)
fine_tune_surfaces_train, fine_tune_surfaces_validation, fine_tune_surfaces_test = split_surfaces(
    fine_tune_data,
    toy_sample=True,
    max_points=20,
    max_surfaces=5,
    random_state=RANDOM_STATE
)

In [3]:
hyperparameters = {
    'Input Preprocessing' : {
        'Mask Proportions' : [0.1, 0.3, 0.5, 0.7],
        'Number of Query Points' : 1,
        'Batch Size' : 5
    },
    'Surface Embedding' : {
        'Embedding Dimension' : 64,
    },
    'Surface Encoding' : {
        'Number of Heads' : 4,
        'FFN Hidden Dimension' : 256,
        'Attention Dropout' : 0.,
        'Gate Dropout' : 0.,
        'FFN Dropout' : 0.,
        'Number of Blocks' : 4,
        'External Feature Dimension' : 5,
        'Weight Initializer Std.' : 0.02,
        'Linear Bias Initializer' : 0.0,
        'Gate Bias Inititalizer' : 10.0
    },
    'Adaptive Loss Weights' : {
        'Asymmetry' : 1.5,
    },
    'Trainer' : {
        'Pre-Train' : {
            'Number of Epochs' : 20,
            'Warmup Ratio' : 0.15,
            'Peak Learning Rate' : 1e-3,
            'Minimal Learning Rate' : 1e-5,
            'Gradient Clipping' : 1,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : None,
        },
        'Fine-Tune' : {
            'Number of Epochs' : 10,
            'Warmup Ratio' : 0.1,
            'Peak Learning Rate' : 1e-3,
            'Minimal Learning Rate' : 1e-6,
            'Gradient Clipping' : 0,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : 0.9,
        }
    }
}

In [21]:
pre_train_dataset_train = IVSurfaceDataset(
    pre_train_surfaces_train, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_train = DataLoader(
    pre_train_dataset_train, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_validation = IVSurfaceDataset(
    pre_train_surfaces_validation, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_validation = DataLoader(
    pre_train_dataset_validation, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_test = IVSurfaceDataset(
    pre_train_surfaces_test, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_test = DataLoader(
    pre_train_dataset_test, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

In [5]:
fine_tune_dataset_train = IVSurfaceDataset(
    fine_tune_surfaces_train, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_train = DataLoader(
    fine_tune_dataset_train, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
fine_tune_dataset_validation = IVSurfaceDataset(
    fine_tune_surfaces_validation, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_validation = DataLoader(
    fine_tune_dataset_validation, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
fine_tune_dataset_test = IVSurfaceDataset(
    fine_tune_surfaces_test, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_test = DataLoader(
    fine_tune_dataset_test, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

In [6]:
torch.manual_seed(RANDOM_STATE)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
model_pre_train = IvySPT(
    hyperparameters['Surface Embedding']['Embedding Dimension'], 
    hyperparameters['Surface Encoding']['Number of Blocks'],
    hyperparameters['Surface Encoding']['Number of Heads'], 
    hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
    hyperparameters['Surface Encoding']['Attention Dropout'], 
    hyperparameters['Surface Encoding']['Gate Dropout'],
    hyperparameters['Surface Encoding']['FFN Dropout'],
    hyperparameters['Surface Encoding']['External Feature Dimension'],
    hyperparameters['Surface Encoding']['Weight Initializer Std.'],
    hyperparameters['Surface Encoding']['Linear Bias Initializer'],
    hyperparameters['Surface Encoding']['Gate Bias Inititalizer']
)

In [7]:
MarketSensitivityAnalysis(model_pre_train, pre_train_data_loader_validation, device).compute_sensitivity()    

Unnamed: 0,Market Return,Market Volatility,Treasury Rate,TV Mean,TV Std.
0,5.300791e-09,-3.680444e-09,2.708491e-09,5.486312e-09,-4.387399e-11
1,4.57936e-09,-3.936594e-09,3.121626e-09,4.651789e-09,1.302495e-09
2,5.19896e-09,-3.705896e-09,2.70765e-09,5.172908e-09,2.300866e-10
3,4.730379e-09,-3.505497e-09,3.177272e-09,4.92785e-09,1.728902e-10
4,5.275354e-09,-3.869429e-09,2.777071e-09,5.058292e-09,1.998642e-10
5,5.591125e-09,-4.460486e-09,2.974249e-09,4.820414e-09,8.651481e-10
6,5.276457e-09,-3.803585e-09,2.642152e-09,5.224385e-09,5.882288e-11
7,4.370255e-09,-4.226236e-09,2.977057e-09,4.424542e-09,1.443153e-09
8,5.623057e-09,-4.194531e-09,3.131289e-09,5.03372e-09,1.005484e-09
9,5.498242e-09,-4.028611e-09,2.708284e-09,5.168156e-09,1.271045e-10


In [9]:
OptimalLossFlatness(model_pre_train, pre_train_data_loader_validation, device).calculate_flatness()    

(tensor([[-1.5917e-04,  9.1110e-03,  1.0178e-02,  2.5670e-02,  1.1351e-02,
           1.9972e-02, -2.6299e-02, -3.0845e-02, -3.9019e-02, -4.3398e-02,
          -5.7609e-02, -1.2017e-02, -4.2323e-02, -8.4755e-03, -3.6993e-02,
          -2.4207e-02, -6.0259e-02, -4.5890e-02, -4.0323e-02, -5.0224e-03,
          -2.8772e-02, -3.9266e-02, -4.4171e-02,  1.2904e-02, -2.2278e-02,
          -2.0048e-02, -3.4520e-02, -7.5087e-03, -9.0928e-03,  1.7408e-02,
          -1.2067e-02, -1.6023e-02, -3.0639e-02,  2.2428e-04, -2.6769e-02,
           5.5081e-03, -9.9355e-03,  6.4645e-03, -5.0180e-03,  1.3802e-02,
          -1.2159e-02,  2.2248e-02, -1.2435e-02,  1.4962e-02,  2.4553e-03,
           4.9902e-02, -2.1854e-02, -1.0599e-03, -6.1956e-03,  3.8186e-02,
           2.3587e-02,  3.3872e-02,  1.0321e-02,  3.1012e-02,  2.5622e-02,
           3.4977e-02,  1.7726e-02,  5.7327e-02,  5.0149e-02,  1.0153e-01,
           1.3832e-02,  4.3278e-02,  6.9490e-02,  5.9584e-02,  1.8566e-02],
         [-3.0192e-03, -

In [19]:
ConvergenceNTK(model_pre_train, pre_train_data_loader_validation, device).calculate_ntk_eigenvalues()

tensor([-6.1202e-05, -4.7568e-05, -4.2913e-05, -3.6620e-05, -2.3285e-05,
        -1.7199e-05, -1.3334e-05, -7.6005e-06, -6.1810e-06,  1.7723e-06,
         5.0658e-06,  1.0935e-05,  1.6904e-05,  1.9331e-05,  2.7516e-05,
         3.3941e-05,  4.5298e-05,  5.1175e-05,  5.4233e-05,  5.7227e-05,
         6.2577e-05,  7.9743e-05,  9.3287e-05,  1.3859e-04,  1.8095e-04,
         2.0091e-04,  2.3962e-04,  3.0984e-04,  4.5091e-04,  5.9554e-04,
         8.3025e-04,  9.7705e-04,  2.0120e-03,  3.7965e-03,  4.4103e-03,
         9.8362e-03,  4.6224e-02,  5.0096e-02,  9.1300e-02,  1.6152e-01,
         4.2422e-01,  7.7977e-01,  1.6154e+00,  2.0385e+00,  4.5672e+00,
         6.2726e+00,  2.4490e+01,  5.2211e+01,  6.2503e+02,  2.5357e+03])