To check whether generated vol cubes are consistent with historical volatility cubes, we do the following procedure:

1) Download the trained VAE model

2) Take 1 volatility cube sample from test dataset

3) Generate N volatility cubes

4) We find one generated vol cube among N generated volatility cubes that is closest to the volatility cube from item 3. The closeness is measured by mean squared error between all data points in vol cube.

5) We repeat items 1-4 for all vol cubes from test dataset and calculate the average MSE across all vol cubes from test dataset.

We do steps 1-5 for random split and for temporal splitting

# Random split

Download, normalize and split vol cube data into train/test datasets

In [1]:
import sys
sys.path.append('../..')  # to go to the main folder of the whole project

# Download the data
from src.data.vol.get_vol_cube_tenors_strikes_dates import get_vol_cube_tenors_strikes_dates
data, opt_tenors, swap_tenors, strikes, dates = get_vol_cube_tenors_strikes_dates()

# Normalize data
from src.data.vol.normalizer import Normalizer
normalizer = Normalizer()
data_norm = normalizer.normalize(data)

# Split train and test datasets
dataset_split_type = 'random_split'
from src.utils.get_train_test_datasets import get_train_test_datasets
data_norm_train, dates_train, data_norm_test, dates_test = get_train_test_datasets(data_norm,
                                                                                   dates,
                                                                                   seed=0,
                                                                                   train_ratio=0.8,
                                                                                   type=dataset_split_type)
data_train = normalizer.denormalize(data_norm_train)
data_test = normalizer.denormalize(data_norm_test)

Load the saved model and its history from the file

In [2]:
from src.utils.load_model_and_history import load_model_and_history

NAME = 'vae_van_leaky_randomsplit_3_200_100_50_25_3000ep_bat16_1e-5'
vae, history = load_model_and_history(NAME,
                                      data_type='vol')




  saveable.load_own_variables(weights_store.get(inner_path))


Check whether generated vol cubes are consistent with historical volatility cubes

In [3]:
import numpy as np
from src.visualization.vol.vol_cube_grids import vol_cube_grids
strikes = ['ATM-100bp', 'ATM-50bp', 'ATM', 'ATM+50bp', 'ATM+100bp']  # graphs only for these strikes

points_num = len(data_test[0].reshape(-1))

# Generate N vol cubes
N = 10_000
latent_space_dim = vae.latent_space_dim
mean = np.zeros(latent_space_dim)
cov = np.eye(latent_space_dim)
np.random.seed(0)
z_initial = np.random.multivariate_normal(mean, cov, N)
gen_vol_cubes = normalizer.denormalize(vae.decoder.predict(np.array(z_initial), verbose=0))

# Find generated vol cube that fits our test dataset the best way in terms of mse error
sq_errs_opt_all = 0
max_err = 0

for date_idx, date in enumerate(dates_test):
    sq_err_opt = float('inf')
    for i in range(N):
        diff = data_test[date_idx] - gen_vol_cubes[i]
        sq_err_curr = (diff**2).sum()
        if sq_err_curr < sq_err_opt:
            sq_err_opt = sq_err_curr
            best_i = i
                
    sq_errs_opt_all += sq_err_opt

    diff = data_test[date_idx] - gen_vol_cubes[best_i]
    max_err_curr = abs(diff).max()
    max_err = max(max_err_curr, max_err)

print('mse over all dates', round((sq_errs_opt_all / len(dates_test) / points_num)**0.5, 2), \
      'bp, while max_err over all dates', round(max_err,2), 'bp')

mse over all dates 1.35 bp, while max_err over all dates 40.32 bp


# Temporal split

In [4]:
import sys
sys.path.append('../..')  # to go to the main folder of the whole project

from src.data.vol.get_vol_cube_tenors_strikes_dates import get_vol_cube_tenors_strikes_dates
data, uniq_opt_tenors, uniq_swap_tenors, uniq_strikes, dates = get_vol_cube_tenors_strikes_dates()

# Normalize data
from src.data.vol.normalizer import Normalizer
normalizer = Normalizer()
data_norm = normalizer.normalize(data)

# Split train and test datasets
dataset_split_type = 'temporal_split'
from src.utils.get_train_test_datasets import get_train_test_datasets
data_norm_train, dates_train, data_norm_test, dates_test = get_train_test_datasets(data_norm,
                                                                                   dates,
                                                                                   type=dataset_split_type)

data_train = normalizer.denormalize(data_norm_train)
data_test = normalizer.denormalize(data_norm_test)

Load the saved model and its history from the file

In [5]:
from src.utils.load_model_and_history import load_model_and_history

NAME = 'vae_van_leaky_3_128_48_3000ep_bat16_3e-4'
vae, history = load_model_and_history(NAME,
                                      data_type='vol')

  saveable.load_own_variables(weights_store.get(inner_path))


Check whether generated vol cubes are consistent with historical volatility cubes

In [6]:
import numpy as np
from src.visualization.vol.vol_cube_grids import vol_cube_grids
strikes = ['ATM-100bp', 'ATM-50bp', 'ATM', 'ATM+50bp', 'ATM+100bp']  # graphs only for these strikes

points_num = len(data_test[0].reshape(-1))

# Generate N vol cubes
N = 10_000
latent_space_dim = vae.latent_space_dim
mean = np.zeros(latent_space_dim)
cov = np.eye(latent_space_dim)
np.random.seed(0)
z_initial = np.random.multivariate_normal(mean, cov, N)
gen_vol_cubes = normalizer.denormalize(vae.decoder.predict(np.array(z_initial), verbose=0))

# Find generated vol cube that fits our test dataset the best way in terms of mse error
sq_errs_opt_all = 0
max_err = 0

for date_idx, date in enumerate(dates_test):
    sq_err_opt = float('inf')
    for i in range(N):
        diff = data_test[date_idx] - gen_vol_cubes[i]
        sq_err_curr = (diff**2).sum()
        if sq_err_curr < sq_err_opt:
            sq_err_opt = sq_err_curr
            best_i = i
                
    sq_errs_opt_all += sq_err_opt

    diff = data_test[date_idx] - gen_vol_cubes[best_i]
    max_err_curr = abs(diff).max()
    max_err = max(max_err_curr, max_err)

print('mse over all dates', round((sq_errs_opt_all / len(dates_test) / points_num)**0.5, 2), \
      'bp, while max_err over all dates', round(max_err,2), 'bp')

mse over all dates 5.36 bp, while max_err over all dates 63.11 bp
