In [None]:
import numpy as np
import pandas as pd
import random
import copy
import torch
from tqdm import tqdm
import warnings
from itertools import product
from torch.utils.data import DataLoader
from ivyspt.input_processing import split_surfaces, IVSurfaceDataset
from ivyspt.trainer import Trainer
from ivyspt.ivyspt import IvySPT

# Set the random seed for reproducibility
RANDOM_STATE = 0
N_JOBS = 8
torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

In [None]:
pre_train_data = pd.read_csv('data/pre_train_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
fine_tune_data = pd.read_csv('data/fine_tune_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
pre_train_surfaces_train, pre_train_surfaces_validation, pre_train_surfaces_test = split_surfaces(
    pre_train_data,
    toy_sample=True,
    max_points=600,
    max_surfaces=60,
    random_state=RANDOM_STATE
)
fine_tune_surfaces_train, fine_tune_surfaces_validation, fine_tune_surfaces_test = split_surfaces(
    fine_tune_data,
    toy_sample=True,
    max_points=600,
    max_surfaces=60,
    random_state=RANDOM_STATE
)

In [None]:
hyperparameters = {
    'Input Preprocessing' : {
        'Mask Proportions' : [0.1, 0.3, 0.5, 0.7],
        'Number of Query Points' : 1,
        'Batch Size' : 50
    },
    'Surface Embedding' : {
        'Embedding Dimension' : 32,
    },
    'Surface Encoding' : {
        'Number of Heads' : 4,
        'FFN Hidden Dimension' : 128,
        'Attention Dropout' : 0.,
        'Gate Dropout' : 0.,
        'FFN Dropout' : 0.,
        'Number of Blocks' : 4,
        'External Feature Dimension' : 5,
        'Weight Initializer Std.' : 0.02,
        'Linear Bias Initializer' : 0.0,
        'Gate Bias Inititalizer' : 10.0
    },
    'Adaptive Loss Weights' : {
        'Asymmetry' : 1.5,
    },
    'Trainer' : {
        'Pre-Train' : {
            'Number of Epochs' : 20,
            'Warmup Ratio' : 0.15,
            'Peak Learning Rate' : 1e-3,
            'Minimal Learning Rate' : 1e-5,
            'Gradient Clipping' : 0,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : None,
        },
        'Fine-Tune' : {
            'Number of Epochs' : 10,
            'Warmup Ratio' : 0.1,
            'Peak Learning Rate' : 1e-3,
            'Minimal Learning Rate' : 1e-6,
            'Gradient Clipping' : 0,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : 0.9,
        }
    }
}

In [None]:
pre_train_dataset_train = IVSurfaceDataset(
    pre_train_surfaces_train, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_train = DataLoader(
    pre_train_dataset_train, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_validation = IVSurfaceDataset(
    pre_train_surfaces_validation, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_validation = DataLoader(
    pre_train_dataset_validation, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_test = IVSurfaceDataset(
    pre_train_surfaces_test, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_test = DataLoader(
    pre_train_dataset_test, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

In [None]:
import torch
import copy
import warnings
import pandas as pd
from itertools import product
from tqdm import tqdm

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the grid of hyperparameters
embedding_dims = [4, 8, 16, 32]
num_blocks = [1, 2, 4, 8]
peak_learning_rates = [1e-4, 1e-3, 1e-2]

# Create a list to store results
results = []

# Generate all combinations of embedding dimensions, number of blocks, and peak learning rates
combinations = list(product(embedding_dims, num_blocks, peak_learning_rates))

# Iterate over each combination of hyperparameters
for embedding_dim, blocks, peak_lr in tqdm(combinations, total=len(combinations)):
    # Deep copy the default hyperparameters to test specific combinations
    test_hyperparameters = copy.deepcopy(hyperparameters)
    
    # Set the specific hyperparameters
    test_hyperparameters['Surface Embedding']['Embedding Dimension'] = embedding_dim
    test_hyperparameters['Surface Encoding']['Number of Blocks'] = blocks
    test_hyperparameters['Surface Encoding']['FFN Hidden Dimension'] = 4 * embedding_dim
    test_hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'] = peak_lr
    
    # Initialize the model with the test hyperparameters
    torch.manual_seed(RANDOM_STATE)
    model_pre_train = IvySPT(
        test_hyperparameters['Surface Embedding']['Embedding Dimension'], 
        test_hyperparameters['Surface Encoding']['Number of Blocks'],
        test_hyperparameters['Surface Encoding']['Number of Heads'], 
        test_hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
        test_hyperparameters['Surface Encoding']['Attention Dropout'], 
        test_hyperparameters['Surface Encoding']['Gate Dropout'],
        test_hyperparameters['Surface Encoding']['FFN Dropout'],
        test_hyperparameters['Surface Encoding']['External Feature Dimension'],
        test_hyperparameters['Surface Encoding']['Weight Initializer Std.'],
        test_hyperparameters['Surface Encoding']['Linear Bias Initializer'],
        test_hyperparameters['Surface Encoding']['Gate Bias Inititalizer']
    )
    
    # Initialize the trainer with the test hyperparameters
    warnings.filterwarnings("ignore", category=UserWarning)        
    pre_trainer = Trainer(
        model_pre_train,
        pre_train_data_loader_train,
        pre_train_data_loader_validation,
        pre_train_data_loader_test,
        test_hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
        test_hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
        test_hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
        test_hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
        test_hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
        test_hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
        test_hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
        test_hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
        test_hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
        test_hyperparameters['Adaptive Loss Weights']['Asymmetry'],
        device
    )
    
    # Train the model and get the validation loss components
    _, _, validate_loss_components_history = pre_trainer.train()
    
    # Get the final validation losses for each component
    final_validation_losses = validate_loss_components_history[-1]
    
    # Append results to the list
    results.append({
        'Embedding Dimension': embedding_dim,
        'Number of Blocks': blocks,
        'Peak Learning Rate': peak_lr,
        'MSE Loss': final_validation_losses[0],
        'Calendar Arbitrage Loss': final_validation_losses[1],
        'Butterfly Arbitrage Loss': final_validation_losses[2]
    })

# Convert the results into a DataFrame
results_df = pd.DataFrame(results)

# Create a multi-index DataFrame with embedding dimension, number of blocks, and peak learning rate as the index
results_df.set_index(['Embedding Dimension', 'Number of Blocks', 'Peak Learning Rate'], inplace=True)

# Rank the columns, where the lowest loss is rank 1
ranked_df = results_df.rank(axis=0, method='min', ascending=True)

# Create an average rank column and sort by it
ranked_df['Average Rank'] = ranked_df.mean(axis=1)
ranked_df.sort_values(by='Average Rank', ascending=True, inplace=True)

ranked_df