In [1]:
import numpy as np
import pandas as pd
import random
import torch

In [2]:
# Set the random seed for reproducibility
RANDOM_STATE = 0
N_JOBS = 8
torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

In [3]:
HYPERPARAMETERS = {
    'Input Preprocessing' : {
        'Mask Proportions' : [0.1, 0.2, 0.4, 0.8],
        'Batch Size' : 4
    },
    'Input Embedding' : {
        'Surface Embedding' : {
            'Grid Dimension' : 3,
            'Channels Dimension' : 8,
        },
        'Pre-Encoder' : {
            'Branch Channels Dimension' : 4,
            'Number of Blocks' : 2,
        }
    },
    'Surface Encoding' : {
        'Encoder' : {
            'Number of Heads' : 4,
            'Hidden Dimension' : 16,
            'Dropout' : 0.1,
            'Number of Blocks' : 2,
            'External Feature Dimension' : 3,
        }
    },
    'Query Embedding' : {
        'Pre-Decoder' : {
            'Hidden Dimension' : 16,
            'Dropout' : 0.1,
            'Number of Blocks' : 2,
        }
    },
    'Surface Decoding' : {
        'Decoder' : {
            'Number of Heads' : 4,
            'Hidden Dimension' : 16,
            'Dropout' : 0.1,
            'Number of Blocks' : 2,
        }
    },
    'No-Arbitrage' : {
        'Butterfly' : 1,
        'Calendar' : 1,
    }
}

## Dataset

In [4]:
aapl_googl_data = pd.read_csv('volatility_surface_AAPL_GOOGL_2013_01_2013_06.csv', parse_dates=True, index_col=[0, 1], date_format="ISO8601")
aapl_googl_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Log Moneyness,Time to Maturity,Implied Volatility,Market Return,Market Volatility,Treasury Rate
Datetime,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-01-02,AAPL,-0.316688,0.007937,0.3726,0.025086,14.680000,0.055
2013-01-02,AAPL,-0.316688,0.007937,0.6095,0.025086,14.680000,0.055
2013-01-02,AAPL,-0.304266,0.007937,0.3726,0.025086,14.680000,0.055
2013-01-02,AAPL,-0.304266,0.007937,0.6095,0.025086,14.680000,0.055
2013-01-02,AAPL,-0.291996,0.007937,0.3726,0.025086,14.680000,0.055
...,...,...,...,...,...,...,...
2013-06-28,GOOGL,0.427518,2.253968,0.2430,-0.004299,16.860001,0.030
2013-06-28,GOOGL,0.434898,2.253968,0.2383,-0.004299,16.860001,0.030
2013-06-28,GOOGL,0.434898,2.253968,0.2426,-0.004299,16.860001,0.030
2013-06-28,GOOGL,0.442224,2.253968,0.2402,-0.004299,16.860001,0.030


In [7]:
def implied_volatility_surfaces(options_market_data):
    # Group the data by Datetime and Symbol
    grouped_data = options_market_data.groupby(level=['Datetime', 'Symbol'])

    surfaces = []
    for (date, symbol), surface in grouped_data:
        surface_dict = {
            'Datetime': date,
            'Symbol': symbol,
            'Market Features': {
                'Market Return': surface['Market Return'].values[0],
                'Market Volatility': surface['Market Volatility'].values[0],
                'Treasury Rate': surface['Treasury Rate'].values[0],
            },
            'Surface': {
                'Log Moneyness': surface['Log Moneyness'].values,
                'Time to Maturity': surface['Time to Maturity'].values,
                'Implied Volatility': surface['Implied Volatility'].values,
            }
        }
        surfaces.append(surface_dict)

    return surfaces

surfaces = implied_volatility_surfaces(aapl_googl_data)
surfaces[0]

{'Datetime': Timestamp('2013-01-02 00:00:00'),
 'Symbol': 'AAPL',
 'Market Features': {'Market Return': 0.0250861159586972,
  'Market Volatility': 14.68000030517578,
  'Treasury Rate': 0.0549999997019767},
 'Surface': {'Log Moneyness': array([-0.31668849, -0.31668849, -0.30426597, ...,  0.63882295,
          0.6483924 ,  0.6483924 ]),
  'Time to Maturity': array([0.00793651, 0.00793651, 0.00793651, ..., 2.95634921, 2.95634921,
         2.95634921]),
  'Implied Volatility': array([0.3726, 0.6095, 0.3726, ..., 0.3387, 0.3342, 0.3389])}}

In [16]:
import torch
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
import numpy as np

class IVSurfaceDataset(Dataset):
    def __init__(
        self, 
        data, 
        proportion, 
        random_state=0
    ):
        self.data = data
        self.proportion = proportion
        self.random_state = random_state

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        surface_data = self.data[idx]
        
        # Extract the surface coordinates and volatilities
        points_coordinates = np.stack([
            surface_data['Surface']['Log Moneyness'], 
            surface_data['Surface']['Time to Maturity']
        ], axis=1)
        points_volatilities = surface_data['Surface']['Implied Volatility']

        # Perform clustering
        n_clusters = int(np.ceil(1 / self.proportion))
        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('kmeans', KMeans(n_clusters=n_clusters, random_state=self.random_state, n_init='auto'))
        ])
        labels = pipeline.fit_predict(points_coordinates)

        rng = np.random.default_rng(self.random_state)
        cluster_indices = np.where(labels == rng.integers(n_clusters))[0]
        num_to_mask = int(np.ceil(len(cluster_indices) * self.proportion))
        masked_indices = rng.choice(cluster_indices, size=num_to_mask, replace=False)
        
        unmasked_indices = np.setdiff1d(cluster_indices, masked_indices)


        data_item = {
            'Datetime': surface_data['Datetime'],
            'Symbol': surface_data['Symbol'],
            'Market Features': {
                'Market Return': torch.tensor(surface_data['Market Features']['Market Return'], dtype=torch.float32),
                'Market Volatility': torch.tensor(surface_data['Market Features']['Market Volatility'], dtype=torch.float32),
                'Treasury Rate': torch.tensor(surface_data['Market Features']['Treasury Rate'], dtype=torch.float32),
            },
            'Input Surface': {
                'Log Moneyness': torch.tensor(points_coordinates[unmasked_indices, 0], dtype=torch.float32),
                'Time to Maturity': torch.tensor(points_coordinates[unmasked_indices, 1], dtype=torch.float32),
                'Implied Volatility': torch.tensor(points_volatilities[unmasked_indices], dtype=torch.float32)
            },
            'Query Points': {
                'Log Moneyness': torch.tensor(points_coordinates[masked_indices, 0], dtype=torch.float32),
                'Time to Maturity': torch.tensor(points_coordinates[masked_indices, 1], dtype=torch.float32),
                'Implied Volatility': torch.tensor(points_volatilities[masked_indices], dtype=torch.float32)
            }
        }

        return data_item

    @staticmethod
    def collate_fn(batch):
        batched_data = {
            'Datetime': [item['Datetime'] for item in batch],
            'Symbol': [item['Symbol'] for item in batch],
            'Market Features': {
                'Market Return': default_collate([item['Market Features']['Market Return'] for item in batch]),
                'Market Volatility': default_collate([item['Market Features']['Market Volatility'] for item in batch]),
                'Treasury Rate': default_collate([item['Market Features']['Treasury Rate'] for item in batch]),
            },
            'Input Surface': {
                'Log Moneyness': [item['Input Surface']['Log Moneyness'].clone().detach() for item in batch],
                'Time to Maturity': [item['Input Surface']['Time to Maturity'].clone().detach() for item in batch],
                'Implied Volatility': [item['Input Surface']['Implied Volatility'].clone().detach() for item in batch],
            },
            'Query Points': {
                'Log Moneyness': [item['Query Points']['Log Moneyness'].clone().detach().requires_grad_(True) for item in batch],
                'Time to Maturity': [item['Query Points']['Time to Maturity'].clone().detach().requires_grad_(True) for item in batch],
                'Implied Volatility': [item['Query Points']['Implied Volatility'].clone().detach() for item in batch],
            }
        }

        return batched_data


# Assuming surfaces is the output from the implied_volatility_surfaces function
proportion = 0.2  # example proportion
dataset = IVSurfaceDataset(surfaces, proportion)
data_loader = DataLoader(
    dataset, 
    batch_size=HYPERPARAMETERS['Input Preprocessing']['Batch Size'], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=IVSurfaceDataset.collate_fn
)

# Fetch one batch from the DataLoader
batch = next(iter(data_loader))
batch

{'Datetime': [Timestamp('2013-05-06 00:00:00'),
  Timestamp('2013-04-04 00:00:00'),
  Timestamp('2013-05-22 00:00:00'),
  Timestamp('2013-06-19 00:00:00')],
 'Symbol': ['AAPL', 'AAPL', 'GOOGL', 'GOOGL'],
 'Market Features': {'Market Return': tensor([ 0.0019,  0.0040, -0.0083, -0.0139]),
  'Market Volatility': tensor([12.6600, 13.8900, 13.8200, 16.6400]),
  'Treasury Rate': tensor([0.0300, 0.0650, 0.0350, 0.0400])},
 'Input Surface': {'Log Moneyness': [tensor([-0.4290, -0.3962, -0.3962, -0.3802, -0.3644, -0.3489, -0.3337, -0.3186,
           -0.3038, -0.3038, -0.2892, -0.2892, -0.2748, -0.2748, -0.2607, -0.2607,
           -0.2467, -0.2467, -0.2329, -0.2329, -0.2193, -0.2058, -0.2058, -0.7167,
           -0.7167, -0.6947, -0.6732, -0.6732, -0.6521, -0.6521, -0.6315, -0.6315,
           -0.6113, -0.6113, -0.5915, -0.5915, -0.5721, -0.5721, -0.5530, -0.5530,
           -0.5343, -0.5160, -0.5160, -0.4980, -0.4980, -0.4803, -0.4629, -0.4629,
           -0.4458, -0.4458, -0.4290, -0.4290, -0

## Surface Embedding

### Components

In [20]:
import torch
import torch.nn as nn
from torch.utils.data._utils.collate import default_collate

class SurfaceBatchNorm(nn.Module):
    def __init__(
        self, 
        num_features=1, 
        momentum=0.1
    ):
        super(SurfaceBatchNorm, self).__init__()
        self.log_moneyness_bn = nn.BatchNorm1d(num_features, momentum=momentum)
        self.time_to_maturity_bn = nn.BatchNorm1d(num_features, momentum=momentum)
        self.market_return_bn = nn.BatchNorm1d(num_features, momentum=momentum)
        self.market_volatility_bn = nn.BatchNorm1d(num_features, momentum=momentum)
        self.treasury_rate_bn = nn.BatchNorm1d(num_features, momentum=momentum)

    def forward(self, batch):
        # Concatenate all tensors from the Input Surface into one tensor for each feature
        input_surface_log_moneyness = torch.cat([x for x in batch['Input Surface']['Log Moneyness']])
        input_surface_time_to_maturity = torch.cat([x for x in batch['Input Surface']['Time to Maturity']])

        # Concatenate Input Surface tensors with Query Points tensors
        total_log_moneyness = torch.cat([input_surface_log_moneyness] + [x for x in batch['Query Points']['Log Moneyness']])
        total_time_to_maturity = torch.cat([input_surface_time_to_maturity] + [x for x in batch['Query Points']['Time to Maturity']])

        # Normalize Log Moneyness and Time to Maturity
        norm_log_moneyness = self.log_moneyness_bn(total_log_moneyness.unsqueeze(1)).squeeze(1)
        norm_time_to_maturity = self.time_to_maturity_bn(total_time_to_maturity.unsqueeze(1)).squeeze(1)

        # Split the normalized results back to corresponding structures
        input_surface_sizes = [len(x) for x in batch['Input Surface']['Log Moneyness']]
        query_points_sizes = [len(x) for x in batch['Query Points']['Log Moneyness']]
        total_input_size = sum(input_surface_sizes)

        # Normalizing Market Features
        market_features = batch['Market Features']
        norm_market_return = self.market_return_bn(market_features['Market Return'].unsqueeze(1)).squeeze(1)
        norm_market_volatility = self.market_volatility_bn(market_features['Market Volatility'].unsqueeze(1)).squeeze(1)
        norm_treasury_rate = self.treasury_rate_bn(market_features['Treasury Rate'].unsqueeze(1)).squeeze(1)

        # Reconstructing the batch with normalized data
        output = {
            'Datetime': batch['Datetime'],
            'Symbol': batch['Symbol'],
            'Market Features': {
                'Market Return': norm_market_return,
                'Market Volatility': norm_market_volatility,
                'Treasury Rate': norm_treasury_rate
            },
            'Input Surface': {
                'Log Moneyness': list(torch.split(norm_log_moneyness[:total_input_size], input_surface_sizes)),
                'Time to Maturity': list(torch.split(norm_time_to_maturity[:total_input_size], input_surface_sizes)),
                'Implied Volatility': batch['Input Surface']['Implied Volatility']
            },
            'Query Points': {
                'Log Moneyness': list(torch.split(norm_log_moneyness[total_input_size:], query_points_sizes)),
                'Time to Maturity': list(torch.split(norm_time_to_maturity[total_input_size:], query_points_sizes)),
                'Implied Volatility': batch['Query Points']['Implied Volatility']
            }
        }

        # Ensure requires_grad is True for query point values
        for key in output['Query Points']:
            if key != 'Implied Volatility':  # We only set requires_grad for Log Moneyness and Time to Maturity
                for tensor in output['Query Points'][key]:
                    tensor.requires_grad_()

        return output

# Usage
surfacebatchnorm = SurfaceBatchNorm()
processed_batch = surfacebatchnorm(batch)
processed_batch

{'Datetime': [Timestamp('2013-05-06 00:00:00'),
  Timestamp('2013-04-04 00:00:00'),
  Timestamp('2013-05-22 00:00:00'),
  Timestamp('2013-06-19 00:00:00')],
 'Symbol': ['AAPL', 'AAPL', 'GOOGL', 'GOOGL'],
 'Market Features': {'Market Return': tensor([ 0.7464,  1.0127, -0.5278, -1.2314], grad_fn=<SqueezeBackward1>),
  'Market Volatility': tensor([-1.0889, -0.2479, -0.2957,  1.6326], grad_fn=<SqueezeBackward1>),
  'Treasury Rate': tensor([-0.9039,  1.6270, -0.5423, -0.1808], grad_fn=<SqueezeBackward1>)},
 'Input Surface': {'Log Moneyness': [tensor([-1.1584, -1.0310, -1.0310, -0.9689, -0.9077, -0.8475, -0.7882, -0.7298,
           -0.6722, -0.6722, -0.6155, -0.6155, -0.5596, -0.5596, -0.5045, -0.5045,
           -0.4502, -0.4502, -0.3966, -0.3966, -0.3438, -0.2916, -0.2916, -2.2758,
           -2.2758, -2.1904, -2.1069, -2.1069, -2.0251, -2.0251, -1.9450, -1.9450,
           -1.8666, -1.8666, -1.7896, -1.7896, -1.7142, -1.7142, -1.6402, -1.6402,
           -1.5676, -1.4964, -1.4964, -1.426

In [22]:
import torch
import torch.nn as nn

class EllipticalRBFKernel(nn.Module):
    def __init__(
        self, 
        input_dim, 
        bandwidth
    ):
        super(EllipticalRBFKernel, self).__init__()
        self.bandwidth = bandwidth
        # Initialize the log of the scale vector to zero, which corresponds to scale factors of one
        self.log_scale = nn.Parameter(torch.zeros(input_dim))

    def forward(self, distances):
        # Convert log scale to actual scale values
        scale = torch.exp(self.log_scale)
        
        # Create a diagonal scale matrix
        scale_matrix = torch.diag(scale)

        # Calculate the scaled distances
        scaled_distances = distances @ scale_matrix @ distances.t()
        
        # Normalize by the trace of the scale matrix
        trace_scale_matrix = torch.trace(scale_matrix)
        normalized_distances = scaled_distances / trace_scale_matrix

        # Compute the RBF kernel output using the normalized distances
        kernel_values = torch.exp(-normalized_distances / (2 * self.bandwidth**2))

        return kernel_values

