In [1]:
import numpy as np
import pandas as pd
import random
import copy
import torch
from tqdm import tqdm
import warnings
from itertools import product
from torch.utils.data import DataLoader
from ivyspt.input_processing import split_surfaces, IVSurfaceDataset
from ivyspt.trainer import Trainer
from ivyspt.ivyspt import IvySPT

# Set the random seed for reproducibility
RANDOM_STATE = 0
N_JOBS = 8
torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

In [2]:
pre_train_data = pd.read_csv('data/pre_train_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
fine_tune_data = pd.read_csv('data/fine_tune_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
pre_train_surfaces_train, pre_train_surfaces_validation, pre_train_surfaces_test = split_surfaces(
    pre_train_data,
    toy_sample=True,
    max_points=20,
    max_surfaces=5,
    random_state=RANDOM_STATE
)
fine_tune_surfaces_train, fine_tune_surfaces_validation, fine_tune_surfaces_test = split_surfaces(
    fine_tune_data,
    toy_sample=True,
    max_points=50,
    max_surfaces=100,
    random_state=RANDOM_STATE
)

In [3]:
hyperparameters = {
    'Input Preprocessing' : {
        'Mask Proportions' : [0.1, 0.3, 0.5, 0.7],
        'Number of Query Points' : 1,
        'Batch Size' : 50
    },
    'Surface Embedding' : {
        'Embedding Dimension' : 32,
    },
    'Surface Encoding' : {
        'Number of Heads' : 4,
        'FFN Hidden Dimension' : 128,
        'Attention Dropout' : 0.,
        'Gate Dropout' : 0.,
        'FFN Dropout' : 0.,
        'Number of Blocks' : 4,
        'External Feature Dimension' : 5,
        'Weight Initializer Std.' : 0.02,
        'Linear Bias Initializer' : 0.0,
        'Gate Bias Inititalizer' : 10.0
    },
    'Adaptive Loss Weights' : {
        'Asymmetry' : 1.5,
    },
    'Trainer' : {
        'Pre-Train' : {
            'Number of Epochs' : 20,
            'Warmup Ratio' : 0.15,
            'Peak Learning Rate' : 1e-3,
            'Minimal Learning Rate' : 1e-5,
            'Gradient Clipping' : 0,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : None,
        },
        'Fine-Tune' : {
            'Number of Epochs' : 10,
            'Warmup Ratio' : 0.1,
            'Peak Learning Rate' : 1e-3,
            'Minimal Learning Rate' : 1e-6,
            'Gradient Clipping' : 0,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : 0.9,
        }
    }
}

In [4]:
pre_train_dataset_train = IVSurfaceDataset(
    pre_train_surfaces_train, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_train = DataLoader(
    pre_train_dataset_train, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_validation = IVSurfaceDataset(
    pre_train_surfaces_validation, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_validation = DataLoader(
    pre_train_dataset_validation, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_test = IVSurfaceDataset(
    pre_train_surfaces_test, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_test = DataLoader(
    pre_train_dataset_test, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

In [5]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"

# Define the grid of hyperparameters
embedding_dims = [4, 8, 16, 32]
num_blocks = [1, 2, 4, 8]

# Create empty DataFrames for each loss component
mse_loss_df = pd.DataFrame(index=num_blocks, columns=embedding_dims)
calendar_loss_df = pd.DataFrame(index=num_blocks, columns=embedding_dims)
butterfly_loss_df = pd.DataFrame(index=num_blocks, columns=embedding_dims)

# Generate all combinations of embedding dimensions and number of blocks
combinations = product(embedding_dims, num_blocks)

# Iterate over each combination of embedding dimension and number of blocks
for embedding_dim, blocks in tqdm(combinations, total=len(embedding_dims) * len(num_blocks)):
    # Deep copy the default hyperparameters to test specific combinations
    test_hyperparameters = copy.deepcopy(hyperparameters)
    
    # Set the specific hyperparameters
    test_hyperparameters['Surface Embedding']['Embedding Dimension'] = embedding_dim
    test_hyperparameters['Surface Encoding']['Number of Blocks'] = blocks
    test_hyperparameters['Surface Encoding']['FFN Hidden Dimension'] = 4 * embedding_dim
    
    # Initialize the model with the test hyperparameters
    torch.manual_seed(RANDOM_STATE)
    model_pre_train = IvySPT(
        test_hyperparameters['Surface Embedding']['Embedding Dimension'], 
        test_hyperparameters['Surface Encoding']['Number of Blocks'],
        test_hyperparameters['Surface Encoding']['Number of Heads'], 
        test_hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
        test_hyperparameters['Surface Encoding']['Attention Dropout'], 
        test_hyperparameters['Surface Encoding']['Gate Dropout'],
        test_hyperparameters['Surface Encoding']['FFN Dropout'],
        test_hyperparameters['Surface Encoding']['External Feature Dimension'],
        test_hyperparameters['Surface Encoding']['Weight Initializer Std.'],
        test_hyperparameters['Surface Encoding']['Linear Bias Initializer'],
        test_hyperparameters['Surface Encoding']['Gate Bias Inititalizer']
    )
    
    # Initialize the trainer with the test hyperparameters
    warnings.filterwarnings("ignore", category=UserWarning)        
    pre_trainer = Trainer(
        model_pre_train,
        pre_train_data_loader_train,
        pre_train_data_loader_validation,
        pre_train_data_loader_test,
        test_hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
        test_hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
        test_hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
        test_hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
        test_hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
        test_hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
        test_hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
        test_hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
        test_hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
        test_hyperparameters['Adaptive Loss Weights']['Asymmetry'],
        device
    )
    
    # Train the model and get the validation loss components
    _, _, validate_loss_components_history = pre_trainer.train()
    
    # Get the final validation losses for each component
    final_validation_losses = validate_loss_components_history[-1]
    
    # Update the DataFrames with the final validation losses
    mse_loss_df.loc[blocks, embedding_dim] = final_validation_losses[0]
    calendar_loss_df.loc[blocks, embedding_dim] = final_validation_losses[1]
    butterfly_loss_df.loc[blocks, embedding_dim] = final_validation_losses[2]

# Display the DataFrames
print("MSE Loss DataFrame:")
print(mse_loss_df)
print("\nCalendar Arbitrage Loss DataFrame:")
print(calendar_loss_df)
print("\nButterfly Arbitrage Loss DataFrame:")
print(butterfly_loss_df)

100%|██████████| 16/16 [13:06<00:00, 49.13s/it]

MSE Loss DataFrame:
         4         8         16        32
1  0.010359  0.001771  0.022034  0.015224
2  0.000324  0.002352  0.001712  0.043932
4  0.009136  0.002454  0.006367  0.051019
8  0.000959  0.019971  0.009072  0.025705

Calendar Arbitrage Loss DataFrame:
         4         8         16        32
1  0.000507   0.00005  0.000014       0.0
2  0.000308  0.000544  0.003353  0.000086
4  0.000006  0.000017  0.000011  0.000155
8       0.0  0.000004  0.000244   0.00067

Butterfly Arbitrage Loss DataFrame:
    4    8    16        32
1  0.0  0.0  0.0       0.0
2  0.0  0.0  0.0  0.006353
4  0.0  0.0  0.0  1.308052
8  0.0  0.0  0.0       0.0





In [6]:
mse_loss_df.stack().reset_index()

Unnamed: 0,level_0,level_1,0
0,1,4,0.010359
1,1,8,0.001771
2,1,16,0.022034
3,1,32,0.015224
4,2,4,0.000324
5,2,8,0.002352
6,2,16,0.001712
7,2,32,0.043932
8,4,4,0.009136
9,4,8,0.002454


In [7]:
# Melt and fix the index names for mse_df_melted
mse_df_melted = mse_loss_df.stack().reset_index().set_index(['level_0', 'level_1'])
mse_df_melted.index.names = ['Number of Blocks', 'Embedding Dimension']
mse_df_melted.columns = ['MSE Loss']

# Melt and fix the index names for calendar_arbitrage_df_melted
calendar_arbitrage_df_melted = calendar_loss_df.stack().reset_index().set_index(['level_0', 'level_1'])
calendar_arbitrage_df_melted.index.names = ['Number of Blocks', 'Embedding Dimension']
calendar_arbitrage_df_melted.columns = ['Calendar Arbitrage Loss']

# Melt and fix the index names for butterfly_arbitrage_df_melted
butterfly_arbitrage_df_melted = butterfly_loss_df.stack().reset_index().set_index(['level_0', 'level_1'])
butterfly_arbitrage_df_melted.index.names = ['Number of Blocks', 'Embedding Dimension']
butterfly_arbitrage_df_melted.columns = ['Butterfly Arbitrage Loss']

combined_df = mse_df_melted.join(calendar_arbitrage_df_melted).join(butterfly_arbitrage_df_melted)
ranked_df = combined_df.rank(axis=0, method='min')
ranked_df['Average Rank'] = ranked_df.mean(axis=1)
sorted_ranked_df = ranked_df.sort_values(by='Average Rank', ascending=True)
sorted_ranked_df

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE Loss,Calendar Arbitrage Loss,Butterfly Arbitrage Loss,Average Rank
Number of Blocks,Embedding Dimension,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8,4,2.0,2.0,1.0,1.666667
1,8,4.0,8.0,1.0,4.333333
1,32,11.0,1.0,1.0,4.333333
4,16,7.0,5.0,1.0,4.333333
2,4,1.0,12.0,1.0,4.666667
4,4,9.0,4.0,1.0,4.666667
4,8,6.0,7.0,1.0,4.666667
8,8,12.0,3.0,1.0,5.333333
1,16,13.0,6.0,1.0,6.666667
2,8,5.0,14.0,1.0,6.666667


In [8]:
combined_df.sort_values('MSE Loss')

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE Loss,Calendar Arbitrage Loss,Butterfly Arbitrage Loss
Number of Blocks,Embedding Dimension,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,4,0.000324,0.000308,0.0
8,4,0.000959,0.0,0.0
2,16,0.001712,0.003353,0.0
1,8,0.001771,5e-05,0.0
2,8,0.002352,0.000544,0.0
4,8,0.002454,1.7e-05,0.0
4,16,0.006367,1.1e-05,0.0
8,16,0.009072,0.000244,0.0
4,4,0.009136,6e-06,0.0
1,4,0.010359,0.000507,0.0


In [9]:
mse_df_melted

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE Loss
Number of Blocks,Embedding Dimension,Unnamed: 2_level_1
1,4,0.010359
1,8,0.001771
1,16,0.022034
1,32,0.015224
2,4,0.000324
2,8,0.002352
2,16,0.001712
2,32,0.043932
4,4,0.009136
4,8,0.002454
