In [1]:
import numpy as np
import pandas as pd
import random
import copy
import torch
from torch.utils.data import DataLoader
from ivyspt.input_processing import split_surfaces, IVSurfaceDataset
from ivyspt.trainer import Trainer
from ivyspt.ivyspt import IvySPT

# Set the random seed for reproducibility
RANDOM_STATE = 0
N_JOBS = 8
torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

In [2]:
pre_train_data = pd.read_csv('data/pre_train_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
fine_tune_data = pd.read_csv('data/fine_tune_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")

In [3]:
# pre_train_surfaces_train, pre_train_surfaces_validation, pre_train_surfaces_test = split_surfaces(
#     pre_train_data,
#     toy_sample=True,
#     max_points=400,
#     max_surfaces=400,
#     random_state=RANDOM_STATE
# )
# fine_tune_surfaces_train, fine_tune_surfaces_validation, fine_tune_surfaces_test = split_surfaces(
#     fine_tune_data,
#     toy_sample=True,
#     max_points=400,
#     max_surfaces=400,
#     random_state=RANDOM_STATE
# )

In [4]:
# pre_train_surfaces_train, pre_train_surfaces_validation, pre_train_surfaces_test = split_surfaces(
#     pre_train_data,
#     toy_sample=True,
#     max_points=200,
#     max_surfaces=200,
#     random_state=RANDOM_STATE
# )
# fine_tune_surfaces_train, fine_tune_surfaces_validation, fine_tune_surfaces_test = split_surfaces(
#     fine_tune_data,
#     toy_sample=True,
#     max_points=200,
#     max_surfaces=200,
#     random_state=RANDOM_STATE
# )

In [5]:
# pre_train_surfaces_train, pre_train_surfaces_validation, pre_train_surfaces_test = split_surfaces(
#     pre_train_data,
#     toy_sample=True,
#     max_points=100,
#     max_surfaces=100,
#     random_state=RANDOM_STATE
# )
# fine_tune_surfaces_train, fine_tune_surfaces_validation, fine_tune_surfaces_test = split_surfaces(
#     fine_tune_data,
#     toy_sample=True,
#     max_points=100,
#     max_surfaces=100,
#     random_state=RANDOM_STATE
# )

In [6]:
pre_train_surfaces_train, pre_train_surfaces_validation, pre_train_surfaces_test = split_surfaces(
    pre_train_data,
    toy_sample=True,
    max_points=50,
    max_surfaces=50,
    random_state=RANDOM_STATE
)
fine_tune_surfaces_train, fine_tune_surfaces_validation, fine_tune_surfaces_test = split_surfaces(
    fine_tune_data,
    toy_sample=True,
    max_points=50,
    max_surfaces=50,
    random_state=RANDOM_STATE
)

| Category    | Symbol | Count  | Sector/Type   |
|-------------|--------|--------|---------------|
| Pre-Train   | SPX    | 373,295| Index         |
| Pre-Train   | GLD    | 346,588| Commodity     |
| Pre-Train   | AAPL   | 306,950| Technology    |
| Pre-Train   | TLT    | 161,764| Bond          |
| Pre-Train   | XLE    | 138,507| Energy        |
| Fine-Tune   | SCOR   | 5,300  | Technology    |
| Fine-Tune   | AIN    | 5,288  | Industrial    |
| Fine-Tune   | AZPN   | 5,284  | Software      |
| Fine-Tune   | RWO    | 5,258  | Real Estate   |
| Fine-Tune   | PNNT   | 5,232  | Financials    |


In [7]:
hyperparameters = {
    'Input Preprocessing' : {
        'Mask Proportions' : [0.1, 0.3, 0.5, 0.7],
        'Number of Query Points' : None,
        'Batch Size' : 10
    },
}

In [8]:
pre_train_dataset_train = IVSurfaceDataset(
    pre_train_surfaces_train, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_train = DataLoader(
    pre_train_dataset_train, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_validation = IVSurfaceDataset(
    pre_train_surfaces_validation, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_validation = DataLoader(
    pre_train_dataset_validation, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_test = IVSurfaceDataset(
    pre_train_surfaces_test, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_test = DataLoader(
    pre_train_dataset_test, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

In [9]:
fine_tune_dataset_train = IVSurfaceDataset(
    fine_tune_surfaces_train, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_train = DataLoader(
    fine_tune_dataset_train, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
fine_tune_dataset_validation = IVSurfaceDataset(
    fine_tune_surfaces_validation, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_validation = DataLoader(
    fine_tune_dataset_validation, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
fine_tune_dataset_test = IVSurfaceDataset(
    fine_tune_surfaces_test, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_test = DataLoader(
    fine_tune_dataset_test, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

In [10]:
# test_hyperparameters = copy.deepcopy(hyperparameters)
test_hyperparameters = {
    'Surface Embedding' : {
        'Embedding Dimension' : 32,
    },
    'Surface Encoding' : {
        'Number of Heads' : 4,
        'FFN Hidden Dimension' : 128,
        'Attention Dropout' : 0.,
        'Gate Dropout' : 0.,
        'FFN Dropout' : 0.,
        'Number of Blocks' : 4,
        'External Feature Dimension' : 5,
        'Weight Initializer Std.' : 0.02,
        'Linear Bias Initializer' : 0.0,
        'Gate Bias Inititalizer' : 5.0
    },
    'Adaptive Loss Weights' : {
        'Asymmetry' : 1.5,
    },
    'Trainer' : {
        'Pre-Train' : {
            'Number of Epochs' : 20,
            'Warmup Ratio' : 0.15,
            'Peak Learning Rate' : 1e-5,
            'Minimal Learning Rate' : 1e-6,
            'Gradient Clipping' : None,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : None,
        },
        'Fine-Tune' : {
            'Number of Epochs' : 10,
            'Warmup Ratio' : 0.1,
            'Peak Learning Rate' : 1e-2,
            'Minimal Learning Rate' : 1e-6,
            'Gradient Clipping' : None,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : 0.9,
        }
    }
}

In [11]:
import warnings
torch.manual_seed(RANDOM_STATE)

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"

model_pre_train = IvySPT(
    test_hyperparameters['Surface Embedding']['Embedding Dimension'], 
    test_hyperparameters['Surface Encoding']['Number of Blocks'],
    test_hyperparameters['Surface Encoding']['Number of Heads'], 
    test_hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
    test_hyperparameters['Surface Encoding']['Attention Dropout'], 
    test_hyperparameters['Surface Encoding']['Gate Dropout'],
    test_hyperparameters['Surface Encoding']['FFN Dropout'],
    test_hyperparameters['Surface Encoding']['External Feature Dimension'],
    test_hyperparameters['Surface Encoding']['Weight Initializer Std.'],
    test_hyperparameters['Surface Encoding']['Linear Bias Initializer'],
    test_hyperparameters['Surface Encoding']['Gate Bias Inititalizer'],
)
warnings.filterwarnings("ignore", category=UserWarning)
pre_trainer = Trainer(
    model_pre_train,
    pre_train_data_loader_train,
    pre_train_data_loader_validation,
    pre_train_data_loader_test,
    test_hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
    test_hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
    test_hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
    test_hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
    test_hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
    test_hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
    test_hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
    test_hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
    test_hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
    test_hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device,
)
warnings.filterwarnings("ignore", category=UserWarning)
fine_tuner = Trainer(
    model_pre_train,
    fine_tune_data_loader_train,
    fine_tune_data_loader_validation,
    fine_tune_data_loader_test,
    test_hyperparameters['Trainer']['Fine-Tune']['Number of Epochs'],
    test_hyperparameters['Trainer']['Fine-Tune']['Warmup Ratio'],
    test_hyperparameters['Trainer']['Fine-Tune']['Peak Learning Rate'],
    test_hyperparameters['Trainer']['Fine-Tune']['Minimal Learning Rate'],
    test_hyperparameters['Trainer']['Fine-Tune']['Gradient Clipping'],
    test_hyperparameters['Trainer']['Fine-Tune']['Adam Betas'],
    test_hyperparameters['Trainer']['Fine-Tune']['Adam Epsilon'],
    test_hyperparameters['Trainer']['Fine-Tune']['Adam Weight Decay'],
    test_hyperparameters['Trainer']['Fine-Tune']['Layer-Wise Decay'],
    test_hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)

In [12]:
pre_train_loss_coefficients_history, pre_train_train_loss_components_history, pre_train_validate_loss_components_history = pre_trainer.train(experiment_name='test')

Epoch 1/20 - Training Loss: [0.00370402 0.         0.        ], Validation Loss: [3.5611878e-03 3.4837864e-19 0.0000000e+00]
Epoch 2/20 - Training Loss: [0.00388041 0.         0.        ], Validation Loss: [5.079069e-03 4.235155e-20 0.000000e+00]
Epoch 3/20 - Training Loss: [0.00363348 0.         0.        ], Validation Loss: [4.9511744e-03 5.5982925e-20 0.0000000e+00]
Epoch 4/20 - Training Loss: [0.00377841 0.         0.        ], Validation Loss: [4.6288371e-03 3.1831345e-20 0.0000000e+00]
Epoch 5/20 - Training Loss: [0.00359821 0.         0.        ], Validation Loss: [5.2378997e-03 4.1957538e-20 0.0000000e+00]
Epoch 6/20 - Training Loss: [0.00408735 0.         0.        ], Validation Loss: [5.2646231e-03 3.9977804e-20 0.0000000e+00]
Epoch 7/20 - Training Loss: [0.00345339 0.         0.        ], Validation Loss: [3.1058704e-03 6.7368458e-20 0.0000000e+00]
Epoch 8/20 - Training Loss: [0.00394519 0.         0.        ], Validation Loss: [3.1659254e-03 4.2108463e-20 0.0000000e+00]
Epo

KeyboardInterrupt: 