In [1]:
import numpy as np
import pandas as pd
import random
import copy
import torch
from torch.utils.data import DataLoader
from ivyspt.input_processing import split_surfaces, IVSurfaceDataset
from ivyspt.trainer import Trainer
from ivyspt.ivyspt import IvySPT
import warnings

# Set the random seed for reproducibility
RANDOM_STATE = 0
N_JOBS = 8
torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

In [2]:
pre_train_data = pd.read_csv('data/pre_train_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
fine_tune_data = pd.read_csv('data/fine_tune_data.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
pre_train_surfaces_train, pre_train_surfaces_validation, pre_train_surfaces_test = split_surfaces(
    pre_train_data,
    toy_sample=True,
    max_points=20,
    max_surfaces=5,
    random_state=RANDOM_STATE
)
fine_tune_surfaces_train, fine_tune_surfaces_validation, fine_tune_surfaces_test = split_surfaces(
    fine_tune_data,
    toy_sample=True,
    max_points=20,
    max_surfaces=5,
    random_state=RANDOM_STATE
)

In [3]:
hyperparameters = {
    'Input Preprocessing' : {
        'Mask Proportions' : [0.1, 0.3, 0.5, 0.7],
        'Number of Query Points' : 1,
        'Batch Size' : 5
    },
    'Surface Embedding' : {
        'Embedding Dimension' : 32,
    },
    'Surface Encoding' : {
        'Number of Heads' : 4,
        'FFN Hidden Dimension' : 128,
        'Attention Dropout' : 0.,
        'Gate Dropout' : 0.,
        'FFN Dropout' : 0.,
        'Number of Blocks' : 4,
        'External Feature Dimension' : 5,
        'Weight Initializer Std.' : 0.02,
        'Linear Bias Initializer' : 0.0,
        'Gate Bias Inititalizer' : 10.0
    },
    'Adaptive Loss Weights' : {
        'Asymmetry' : 1.5,
    },
    'Trainer' : {
        'Pre-Train' : {
            'Number of Epochs' : 20,
            'Warmup Ratio' : 0.15,
            'Peak Learning Rate' : 1e-3,
            'Minimal Learning Rate' : 1e-5,
            'Gradient Clipping' : 1,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : None,
        },
        'Fine-Tune' : {
            'Number of Epochs' : 10,
            'Warmup Ratio' : 0.1,
            'Peak Learning Rate' : 1e-3,
            'Minimal Learning Rate' : 1e-6,
            'Gradient Clipping' : 0,
            'Adam Betas' : (0.9, 0.999),
            'Adam Epsilon' : 1e-8,
            'Adam Weight Decay' : 0.01,
            'Layer-Wise Decay' : 0.9,
        }
    }
}

In [4]:
pre_train_dataset_train = IVSurfaceDataset(
    pre_train_surfaces_train, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_train = DataLoader(
    pre_train_dataset_train, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_validation = IVSurfaceDataset(
    pre_train_surfaces_validation, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_validation = DataLoader(
    pre_train_dataset_validation, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
pre_train_dataset_test = IVSurfaceDataset(
    pre_train_surfaces_test, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
pre_train_data_loader_test = DataLoader(
    pre_train_dataset_test, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

In [5]:
fine_tune_dataset_train = IVSurfaceDataset(
    fine_tune_surfaces_train, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_train = DataLoader(
    fine_tune_dataset_train, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
fine_tune_dataset_validation = IVSurfaceDataset(
    fine_tune_surfaces_validation, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_validation = DataLoader(
    fine_tune_dataset_validation, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)
fine_tune_dataset_test = IVSurfaceDataset(
    fine_tune_surfaces_test, 
    hyperparameters['Input Preprocessing']['Mask Proportions'], 
    RANDOM_STATE, 
    hyperparameters['Input Preprocessing']['Number of Query Points'] 
)
fine_tune_data_loader_test = DataLoader(
    fine_tune_dataset_test, 
    batch_size=hyperparameters['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

In [6]:
import time
import torch
import tracemalloc

def measure_time_memory(func, device, *args, **kwargs):
    start_time = time.time()
    tracemalloc.start()

    # Measure the peak memory usage
    if device == "cuda":
        torch.cuda.reset_peak_memory_stats()

    result = func(*args, **kwargs)

    elapsed_time = time.time() - start_time
    if device == "cuda":
        max_memory = torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert to MB
    else:
        _, peak = tracemalloc.get_traced_memory()
        max_memory = peak / (1024 ** 2)  # Convert to MB

    tracemalloc.stop()

    print(f"Time: {elapsed_time:.2f} seconds")
    print(f"Max Memory: {max_memory:.2f} MB")

    return result

In [7]:
import subprocess
import torch

def get_cpu_info():
    # Run the lscpu command
    result = subprocess.run(['lscpu'], stdout=subprocess.PIPE)
    # Decode the output from bytes to string
    lscpu_output = result.stdout.decode('utf-8')
    
    # Parse the lscpu output
    cpu_info = {}
    for line in lscpu_output.split('\n'):
        if line.strip():
            parts = line.split(':', 1)
            if len(parts) == 2:
                key, value = parts
                cpu_info[key.strip()] = value.strip()

    # Extract useful information
    useful_info = {
        "Model name": cpu_info.get("Model name"),
    }

    return useful_info

def format_cpu_info(cpu_info):
    report = (
        f"Model Name: {cpu_info['Model name']}\n"
    )
    return report

def print_device_info():
    # Print CPU info
    cpu_info = get_cpu_info()
    print("CPU Information:")
    print(format_cpu_info(cpu_info))

    # Check if CUDA (GPU) is available
    if torch.cuda.is_available():
        print("\nGPU Information:")
        print(f"Number of GPUs available: {torch.cuda.device_count()}")
        for i in range(torch.cuda.device_count()):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    else:
        print("\nNo GPU available. Running on CPU.")

# Call the function
print_device_info()

CPU Information:
Model Name: Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz


GPU Information:
Number of GPUs available: 1
GPU 0: NVIDIA GeForce RTX 2060 SUPER


In [8]:
def save_training_results(
    name, 
    loss_coefficients_history, 
    train_loss_components_history, 
    validate_loss_components_history, 
    test_loss_records
):
    # Convert numpy arrays to DataFrames
    loss_coefficients_df = pd.DataFrame(loss_coefficients_history, columns=['MSE Coeff', 'Calendar Coeff', 'Butterfly Coeff'])
    train_loss_components_df = pd.DataFrame(train_loss_components_history, columns=['Train MSE Loss', 'Train Calendar Loss', 'Train Butterfly Loss'])
    validate_loss_components_df = pd.DataFrame(validate_loss_components_history, columns=['Validate MSE Loss', 'Validate Calendar Loss', 'Validate Butterfly Loss'])

    # Combine into one DataFrame
    combined_history_df = pd.concat([loss_coefficients_df, train_loss_components_df, validate_loss_components_df], axis=1)

    # Save the training history DataFrame
    combined_history_df.to_csv(f'tests/{name}_train_history.csv', index=False)

    # Save the test loss records
    test_loss_records.to_csv(f'tests/{name}_test_loss_records.csv')


# Default Run

In [9]:
torch.manual_seed(RANDOM_STATE)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f"Using device: {device}")
model_pre_train = IvySPT(
    hyperparameters['Surface Embedding']['Embedding Dimension'], 
    hyperparameters['Surface Encoding']['Number of Blocks'],
    hyperparameters['Surface Encoding']['Number of Heads'], 
    hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
    hyperparameters['Surface Encoding']['Attention Dropout'], 
    hyperparameters['Surface Encoding']['Gate Dropout'],
    hyperparameters['Surface Encoding']['FFN Dropout'],
    hyperparameters['Surface Encoding']['External Feature Dimension'],
    hyperparameters['Surface Encoding']['Weight Initializer Std.'],
    hyperparameters['Surface Encoding']['Linear Bias Initializer'],
    hyperparameters['Surface Encoding']['Gate Bias Inititalizer']
)
warnings.filterwarnings("ignore", category=UserWarning)  
pre_trainer = Trainer(
    model_pre_train,
    pre_train_data_loader_train,
    pre_train_data_loader_validation,
    pre_train_data_loader_test,
    hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
    hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
    hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
    hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
    hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
    hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
    hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)
# Pre-Training
print("\nPre-Training:")
pre_train_loss_coefficients_history, pre_train_train_loss_components_history, pre_train_validate_loss_components_history = \
    measure_time_memory(
        pre_trainer.train, 
        device, 
        # experiment_name="pre_train_experiment"
    )

# Test Pre-Trained Model on Pre-Train Dataset
print("\nTesting Pre-Trained Model on Pre-Train Dataset:")
pre_train_test_loss_components, pre_train_test_loss_records = measure_time_memory(pre_trainer.test, device)
print(f"Pre-Train Test Loss Components: {pre_train_test_loss_components}")

save_training_results(
    'pre_train',
    pre_train_loss_coefficients_history, 
    pre_train_train_loss_components_history, 
    pre_train_validate_loss_components_history, 
    pre_train_test_loss_records
)

# Test Pre-Trained Model on Fine-Tune Dataset
print("\nTesting Pre-Trained Model on Fine-Tune Dataset:")
warnings.filterwarnings("ignore", category=UserWarning)
pre_train_trainer_fine_test = Trainer(
    model_pre_train,  # Use the same pre-trained model
    fine_tune_data_loader_train,
    fine_tune_data_loader_validation,
    fine_tune_data_loader_test,
    hyperparameters['Trainer']['Fine-Tune']['Number of Epochs'],
    hyperparameters['Trainer']['Fine-Tune']['Warmup Ratio'],
    hyperparameters['Trainer']['Fine-Tune']['Peak Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Gradient Clipping'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Betas'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Epsilon'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Weight Decay'],
    hyperparameters['Trainer']['Fine-Tune']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)

pre_train_fine_test_loss_components, pre_train_fine_test_loss_records = measure_time_memory(pre_train_trainer_fine_test.test, device)
print(f"Pre-Train Fine-Tune Dataset Test Loss Components: {pre_train_fine_test_loss_components}")

# Fine-Tuning
print("\nFine-Tuning:")

# Create a copy of the pre-trained model
model_fine_tune = copy.deepcopy(model_pre_train)

warnings.filterwarnings("ignore", category=UserWarning)  
fine_tuner = Trainer(
    model_fine_tune,
    fine_tune_data_loader_train,
    fine_tune_data_loader_validation,
    fine_tune_data_loader_test,
    hyperparameters['Trainer']['Fine-Tune']['Number of Epochs'],
    hyperparameters['Trainer']['Fine-Tune']['Warmup Ratio'],
    hyperparameters['Trainer']['Fine-Tune']['Peak Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Gradient Clipping'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Betas'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Epsilon'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Weight Decay'],
    hyperparameters['Trainer']['Fine-Tune']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)

fine_tune_loss_coefficients_history, fine_tune_train_loss_components_history, fine_tune_validate_loss_components_history = measure_time_memory(
    fine_tuner.train, 
    device, 
    # experiment_name="fine_tune_experiment"
)

# Test Fine-Tuned Model
print("\nTesting Fine-Tuned Model:")
fine_tune_test_loss_components, fine_tune_test_loss_records = measure_time_memory(fine_tuner.test, device)
print(f"Fine-Tune Test Loss Components: {fine_tune_test_loss_components}")
save_training_results(
    'pre_train',
    fine_tune_loss_coefficients_history, 
    fine_tune_train_loss_components_history, 
    fine_tune_validate_loss_components_history, 
    fine_tune_test_loss_records
)

# Save the pre-trained model
torch.save(model_pre_train.state_dict(), 'models/model_pre_train.pth')
# Save the fine-tuned model
torch.save(model_fine_tune.state_dict(), 'models/model_fine_tune.pth')

Using device: cpu

Pre-Training:
10000000000
Time: 82.99 seconds
Max Memory: 0.76 MB

Testing Pre-Trained Model on Pre-Train Dataset:
Time: 0.47 seconds
Max Memory: 0.29 MB
Pre-Train Test Loss Components: tensor([0.0364, 0.0051, 0.0000])

Testing Pre-Trained Model on Fine-Tune Dataset:
Time: 0.46 seconds
Max Memory: 0.26 MB
Pre-Train Fine-Tune Dataset Test Loss Components: tensor([2.5841e-02, 5.9880e-03, 1.3396e+01])

Fine-Tuning:
10000000000
Time: 39.77 seconds
Max Memory: 0.41 MB

Testing Fine-Tuned Model:
Time: 0.48 seconds
Max Memory: 0.27 MB
Fine-Tune Test Loss Components: tensor([0.0304, 0.0144, 0.0000])


# Removing the Continuous Kernel

In [10]:
torch.manual_seed(RANDOM_STATE)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f"Using device: {device}")
remove_kernel_model_pre_train = IvySPT(
    hyperparameters['Surface Embedding']['Embedding Dimension'], 
    hyperparameters['Surface Encoding']['Number of Blocks'],
    hyperparameters['Surface Encoding']['Number of Heads'], 
    hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
    hyperparameters['Surface Encoding']['Attention Dropout'], 
    hyperparameters['Surface Encoding']['Gate Dropout'],
    hyperparameters['Surface Encoding']['FFN Dropout'],
    hyperparameters['Surface Encoding']['External Feature Dimension'],
    hyperparameters['Surface Encoding']['Weight Initializer Std.'],
    hyperparameters['Surface Encoding']['Linear Bias Initializer'],
    hyperparameters['Surface Encoding']['Gate Bias Inititalizer'],
    remove_kernel=True,
)
warnings.filterwarnings("ignore", category=UserWarning)  
remove_kernel_pre_trainer = Trainer(
    remove_kernel_model_pre_train,
    pre_train_data_loader_train,
    pre_train_data_loader_validation,
    pre_train_data_loader_test,
    hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
    hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
    hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
    hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
    hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
    hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
    hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)
# Pre-Training
print("\nPre-Training:")
remove_kernel_pre_train_loss_coefficients_history, remove_kernel_pre_train_train_loss_components_history, remove_kernel_pre_train_validate_loss_components_history = \
    measure_time_memory(
        remove_kernel_pre_trainer.train, 
        device, 
        # experiment_name="pre_train_experiment"
    )

# Test Pre-Trained Model
print("\nTesting Pre-Trained Model:")
remove_kernel_pre_train_test_loss_components, remove_kernel_pre_train_test_loss_records = measure_time_memory(remove_kernel_pre_trainer.test, device)
print(f"Pre-Train Test Loss Components: {remove_kernel_pre_train_test_loss_components}")

warnings.filterwarnings("ignore", category=UserWarning)  
remove_kernel_fine_tuner = Trainer(
    remove_kernel_model_pre_train,
    fine_tune_data_loader_train,
    fine_tune_data_loader_validation,
    fine_tune_data_loader_test,
    hyperparameters['Trainer']['Fine-Tune']['Number of Epochs'],
    hyperparameters['Trainer']['Fine-Tune']['Warmup Ratio'],
    hyperparameters['Trainer']['Fine-Tune']['Peak Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Gradient Clipping'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Betas'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Epsilon'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Weight Decay'],
    hyperparameters['Trainer']['Fine-Tune']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)

# Fine-Tuning
print("\nFine-Tuning:")
remove_kernel_fine_tune_loss_coefficients_history, remove_kernel_fine_tune_train_loss_components_history, remove_kernel_fine_tune_validate_loss_components_history = measure_time_memory(
    remove_kernel_fine_tuner.train, 
    device, 
    # experiment_name="fine_tune_experiment"
)

# Test Fine-Tuned Model
print("\nTesting Fine-Tuned Model:")
remove_kernel_fine_tune_test_loss_components, remove_kernel_fine_tune_test_loss_records = measure_time_memory(remove_kernel_fine_tuner.test, device)
print(f"Fine-Tune Test Loss Components: {remove_kernel_fine_tune_test_loss_components}")

Using device: cpu

Pre-Training:
Time: 81.71 seconds
Max Memory: 0.44 MB

Testing Pre-Trained Model:
Time: 0.49 seconds
Max Memory: 0.27 MB
Pre-Train Test Loss Components: tensor([0.0066, 0.0012, 0.0000])

Fine-Tuning:
Time: 40.12 seconds
Max Memory: 0.40 MB

Testing Fine-Tuned Model:
Time: 0.47 seconds
Max Memory: 0.27 MB
Fine-Tune Test Loss Components: tensor([0.0057, 0.0023, 0.1761])


# Removing the Positional Embedding

In [11]:
torch.manual_seed(RANDOM_STATE)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f"Using device: {device}")
remove_pe_model_pre_train = IvySPT(
    hyperparameters['Surface Embedding']['Embedding Dimension'], 
    hyperparameters['Surface Encoding']['Number of Blocks'],
    hyperparameters['Surface Encoding']['Number of Heads'], 
    hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
    hyperparameters['Surface Encoding']['Attention Dropout'], 
    hyperparameters['Surface Encoding']['Gate Dropout'],
    hyperparameters['Surface Encoding']['FFN Dropout'],
    hyperparameters['Surface Encoding']['External Feature Dimension'],
    hyperparameters['Surface Encoding']['Weight Initializer Std.'],
    hyperparameters['Surface Encoding']['Linear Bias Initializer'],
    hyperparameters['Surface Encoding']['Gate Bias Inititalizer'],
    remove_positional_embedding=True,
)
warnings.filterwarnings("ignore", category=UserWarning)  
remove_pe_pre_trainer = Trainer(
    remove_pe_model_pre_train,
    pre_train_data_loader_train,
    pre_train_data_loader_validation,
    pre_train_data_loader_test,
    hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
    hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
    hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
    hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
    hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
    hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
    hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)
# Pre-Training
print("\nPre-Training:")
remove_pe_pre_train_loss_coefficients_history, remove_pe_pre_train_train_loss_components_history, remove_pe_pre_train_validate_loss_components_history = \
    measure_time_memory(
        remove_pe_pre_trainer.train, 
        device, 
        # experiment_name="pre_train_experiment"
    )

# Test Pre-Trained Model
print("\nTesting Pre-Trained Model:")
remove_pe_pre_train_test_loss_components, remove_pe_pre_train_test_loss_records = measure_time_memory(remove_pe_pre_trainer.test, device)
print(f"Pre-Train Test Loss Components: {remove_pe_pre_train_test_loss_components}")

warnings.filterwarnings("ignore", category=UserWarning)  
remove_pe_fine_tuner = Trainer(
    remove_pe_model_pre_train,
    fine_tune_data_loader_train,
    fine_tune_data_loader_validation,
    fine_tune_data_loader_test,
    hyperparameters['Trainer']['Fine-Tune']['Number of Epochs'],
    hyperparameters['Trainer']['Fine-Tune']['Warmup Ratio'],
    hyperparameters['Trainer']['Fine-Tune']['Peak Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Gradient Clipping'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Betas'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Epsilon'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Weight Decay'],
    hyperparameters['Trainer']['Fine-Tune']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)

# Fine-Tuning
print("\nFine-Tuning:")
remove_pe_fine_tune_loss_coefficients_history, remove_pe_fine_tune_train_loss_components_history, remove_pe_fine_tune_validate_loss_components_history = measure_time_memory(
    remove_pe_fine_tuner.train, 
    device, 
    # experiment_name="fine_tune_experiment"
)

# Test Fine-Tuned Model
print("\nTesting Fine-Tuned Model:")
remove_pe_fine_tune_test_loss_components, remove_pe_fine_tune_test_loss_records = measure_time_memory(remove_pe_fine_tuner.test, device)
print(f"Fine-Tune Test Loss Components: {remove_pe_fine_tune_test_loss_components}")

Using device: cpu

Pre-Training:
Time: 82.45 seconds
Max Memory: 0.40 MB

Testing Pre-Trained Model:
Time: 0.48 seconds
Max Memory: 0.26 MB
Pre-Train Test Loss Components: tensor([nan, nan, nan])

Fine-Tuning:
Time: 39.96 seconds
Max Memory: 0.39 MB

Testing Fine-Tuned Model:
Time: 0.47 seconds
Max Memory: 0.26 MB
Fine-Tune Test Loss Components: tensor([nan, nan, nan])


# Removing the External Attention

In [12]:
torch.manual_seed(RANDOM_STATE)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f"Using device: {device}")
remove_market_model_pre_train = IvySPT(
    hyperparameters['Surface Embedding']['Embedding Dimension'], 
    hyperparameters['Surface Encoding']['Number of Blocks'],
    hyperparameters['Surface Encoding']['Number of Heads'], 
    hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
    hyperparameters['Surface Encoding']['Attention Dropout'], 
    hyperparameters['Surface Encoding']['Gate Dropout'],
    hyperparameters['Surface Encoding']['FFN Dropout'],
    hyperparameters['Surface Encoding']['External Feature Dimension'],
    hyperparameters['Surface Encoding']['Weight Initializer Std.'],
    hyperparameters['Surface Encoding']['Linear Bias Initializer'],
    hyperparameters['Surface Encoding']['Gate Bias Inititalizer'],
    remove_external_attention=True,
)
warnings.filterwarnings("ignore", category=UserWarning)  
remove_market_pre_trainer = Trainer(
    remove_market_model_pre_train,
    pre_train_data_loader_train,
    pre_train_data_loader_validation,
    pre_train_data_loader_test,
    hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
    hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
    hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
    hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
    hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
    hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
    hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)
# Pre-Training
print("\nPre-Training:")
remove_market_pre_train_loss_coefficients_history, remove_market_pre_train_train_loss_components_history, remove_market_pre_train_validate_loss_components_history = \
    measure_time_memory(
        remove_market_pre_trainer.train, 
        device, 
        # experiment_name="pre_train_experiment"
    )

# Test Pre-Trained Model
print("\nTesting Pre-Trained Model:")
remove_market_pre_train_test_loss_components, remove_market_pre_train_test_loss_records = measure_time_memory(remove_market_pre_trainer.test, device)
print(f"Pre-Train Test Loss Components: {remove_market_pre_train_test_loss_components}")

warnings.filterwarnings("ignore", category=UserWarning)  
remove_market_fine_tuner = Trainer(
    remove_market_model_pre_train,
    fine_tune_data_loader_train,
    fine_tune_data_loader_validation,
    fine_tune_data_loader_test,
    hyperparameters['Trainer']['Fine-Tune']['Number of Epochs'],
    hyperparameters['Trainer']['Fine-Tune']['Warmup Ratio'],
    hyperparameters['Trainer']['Fine-Tune']['Peak Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Gradient Clipping'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Betas'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Epsilon'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Weight Decay'],
    hyperparameters['Trainer']['Fine-Tune']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)

# Fine-Tuning
print("\nFine-Tuning:")
remove_market_fine_tune_loss_coefficients_history, remove_market_fine_tune_train_loss_components_history, remove_market_fine_tune_validate_loss_components_history = measure_time_memory(
    remove_market_fine_tuner.train, 
    device, 
    # experiment_name="fine_tune_experiment"
)

# Test Fine-Tuned Model
print("\nTesting Fine-Tuned Model:")
remove_market_fine_tune_test_loss_components, remove_market_fine_tune_test_loss_records = measure_time_memory(remove_market_fine_tuner.test, device)
print(f"Fine-Tune Test Loss Components: {remove_market_fine_tune_test_loss_components}")

Using device: cpu

Pre-Training:


KeyboardInterrupt: 

# Removing the Gated Fusion

In [None]:
torch.manual_seed(RANDOM_STATE)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f"Using device: {device}")
remove_gate_model_pre_train = IvySPT(
    hyperparameters['Surface Embedding']['Embedding Dimension'], 
    hyperparameters['Surface Encoding']['Number of Blocks'],
    hyperparameters['Surface Encoding']['Number of Heads'], 
    hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
    hyperparameters['Surface Encoding']['Attention Dropout'], 
    hyperparameters['Surface Encoding']['Gate Dropout'],
    hyperparameters['Surface Encoding']['FFN Dropout'],
    hyperparameters['Surface Encoding']['External Feature Dimension'],
    hyperparameters['Surface Encoding']['Weight Initializer Std.'],
    hyperparameters['Surface Encoding']['Linear Bias Initializer'],
    hyperparameters['Surface Encoding']['Gate Bias Inititalizer'],
    remove_gate=True
)
warnings.filterwarnings("ignore", category=UserWarning)  
remove_gate_pre_trainer = Trainer(
    remove_gate_model_pre_train,
    pre_train_data_loader_train,
    pre_train_data_loader_validation,
    pre_train_data_loader_test,
    hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
    hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
    hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
    hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
    hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
    hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
    hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)
# Pre-Training
print("\nPre-Training:")
remove_gate_pre_train_loss_coefficients_history, remove_gate_pre_train_train_loss_components_history, remove_gate_pre_train_validate_loss_components_history = \
    measure_time_memory(
        remove_gate_pre_trainer.train, 
        device, 
        # experiment_name="pre_train_experiment"
    )

# Test Pre-Trained Model
print("\nTesting Pre-Trained Model:")
remove_gate_pre_train_test_loss_components, remove_gate_pre_train_test_loss_records = measure_time_memory(remove_gate_pre_trainer.test, device)
print(f"Pre-Train Test Loss Components: {remove_gate_pre_train_test_loss_components}")

warnings.filterwarnings("ignore", category=UserWarning)  
remove_gate_fine_tuner = Trainer(
    remove_gate_model_pre_train,
    fine_tune_data_loader_train,
    fine_tune_data_loader_validation,
    fine_tune_data_loader_test,
    hyperparameters['Trainer']['Fine-Tune']['Number of Epochs'],
    hyperparameters['Trainer']['Fine-Tune']['Warmup Ratio'],
    hyperparameters['Trainer']['Fine-Tune']['Peak Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Gradient Clipping'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Betas'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Epsilon'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Weight Decay'],
    hyperparameters['Trainer']['Fine-Tune']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device
)

# Fine-Tuning
print("\nFine-Tuning:")
remove_gate_fine_tune_loss_coefficients_history, remove_gate_fine_tune_train_loss_components_history, remove_gate_fine_tune_validate_loss_components_history = measure_time_memory(
    remove_gate_fine_tuner.train, 
    device, 
    # experiment_name="fine_tune_experiment"
)

# Test Fine-Tuned Model
print("\nTesting Fine-Tuned Model:")
remove_gate_fine_tune_test_loss_components, remove_gate_fine_tune_test_loss_records = measure_time_memory(remove_gate_fine_tuner.test, device)
print(f"Fine-Tune Test Loss Components: {remove_gate_fine_tune_test_loss_components}")

Using device: cpu

Pre-Training:
Time: 79.52 seconds
Max Memory: 0.43 MB

Testing Pre-Trained Model:
Time: 0.48 seconds
Max Memory: 0.26 MB
Pre-Train Test Loss Components: tensor([0.0470, 0.0005, 0.0000])

Fine-Tuning:
Time: 39.64 seconds
Max Memory: 0.40 MB

Testing Fine-Tuned Model:
Time: 0.49 seconds
Max Memory: 0.26 MB
Fine-Tune Test Loss Components: tensor([0.0903, 0.0003, 0.0432])


# Removing the No Arbitrage Conditions Soft Constraints

In [None]:
torch.manual_seed(RANDOM_STATE)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f"Using device: {device}")
remove_arbitrage_model_pre_train = IvySPT(
    hyperparameters['Surface Embedding']['Embedding Dimension'], 
    hyperparameters['Surface Encoding']['Number of Blocks'],
    hyperparameters['Surface Encoding']['Number of Heads'], 
    hyperparameters['Surface Encoding']['FFN Hidden Dimension'],
    hyperparameters['Surface Encoding']['Attention Dropout'], 
    hyperparameters['Surface Encoding']['Gate Dropout'],
    hyperparameters['Surface Encoding']['FFN Dropout'],
    hyperparameters['Surface Encoding']['External Feature Dimension'],
    hyperparameters['Surface Encoding']['Weight Initializer Std.'],
    hyperparameters['Surface Encoding']['Linear Bias Initializer'],
    hyperparameters['Surface Encoding']['Gate Bias Inititalizer']
)
warnings.filterwarnings("ignore", category=UserWarning)  
remove_arbitrage_pre_trainer = Trainer(
    remove_arbitrage_model_pre_train,
    pre_train_data_loader_train,
    pre_train_data_loader_validation,
    pre_train_data_loader_test,
    hyperparameters['Trainer']['Pre-Train']['Number of Epochs'],
    hyperparameters['Trainer']['Pre-Train']['Warmup Ratio'],
    hyperparameters['Trainer']['Pre-Train']['Peak Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Pre-Train']['Gradient Clipping'],
    hyperparameters['Trainer']['Pre-Train']['Adam Betas'],
    hyperparameters['Trainer']['Pre-Train']['Adam Epsilon'],
    hyperparameters['Trainer']['Pre-Train']['Adam Weight Decay'],
    hyperparameters['Trainer']['Pre-Train']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device,
    remove_multi_loss=True
)
# Pre-Training
print("\nPre-Training:")
remove_arbitrage_pre_train_loss_coefficients_history, remove_arbitrage_pre_train_train_loss_components_history, remove_arbitrage_pre_train_validate_loss_components_history = \
    measure_time_memory(
        remove_arbitrage_pre_trainer.train, 
        device, 
        # experiment_name="pre_train_experiment"
    )

# Test Pre-Trained Model
print("\nTesting Pre-Trained Model:")
remove_arbitrage_pre_train_test_loss_components, remove_arbitrage_pre_train_test_loss_records = measure_time_memory(remove_arbitrage_pre_trainer.test, device)
print(f"Pre-Train Test Loss Components: {remove_arbitrage_pre_train_test_loss_components}")

warnings.filterwarnings("ignore", category=UserWarning)  
remove_arbitrage_fine_tuner = Trainer(
    remove_arbitrage_model_pre_train,
    fine_tune_data_loader_train,
    fine_tune_data_loader_validation,
    fine_tune_data_loader_test,
    hyperparameters['Trainer']['Fine-Tune']['Number of Epochs'],
    hyperparameters['Trainer']['Fine-Tune']['Warmup Ratio'],
    hyperparameters['Trainer']['Fine-Tune']['Peak Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Minimal Learning Rate'],
    hyperparameters['Trainer']['Fine-Tune']['Gradient Clipping'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Betas'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Epsilon'],
    hyperparameters['Trainer']['Fine-Tune']['Adam Weight Decay'],
    hyperparameters['Trainer']['Fine-Tune']['Layer-Wise Decay'],
    hyperparameters['Adaptive Loss Weights']['Asymmetry'],
    device,
    remove_multi_loss=True
)

# Fine-Tuning
print("\nFine-Tuning:")
remove_arbitrage_fine_tune_loss_coefficients_history, remove_arbitrage_fine_tune_train_loss_components_history, remove_arbitrage_fine_tune_validate_loss_components_history = measure_time_memory(
    remove_arbitrage_fine_tuner.train, 
    device, 
    # experiment_name="fine_tune_experiment"
)

# Test Fine-Tuned Model
print("\nTesting Fine-Tuned Model:")
remove_arbitrage_fine_tune_test_loss_components, remove_arbitrage_fine_tune_test_loss_records = measure_time_memory(remove_arbitrage_fine_tuner.test, device)
print(f"Fine-Tune Test Loss Components: {remove_arbitrage_fine_tune_test_loss_components}")

Using device: cpu

Pre-Training:
Time: 15.39 seconds
Max Memory: 0.46 MB

Testing Pre-Trained Model:
Time: 0.49 seconds
Max Memory: 0.27 MB
Pre-Train Test Loss Components: tensor([0.0033, 0.0045, 0.0000])

Fine-Tuning:
Time: 7.60 seconds
Max Memory: 0.43 MB

Testing Fine-Tuned Model:
Time: 0.49 seconds
Max Memory: 0.27 MB
Fine-Tune Test Loss Components: tensor([0.0066, 0.0002, 0.0000])
