##### Download, normalize and split vol cube data into train/test datasets

In [1]:
import sys
sys.path.append('../..')  # to go to the main folder of the whole project

from src.data.vol.get_vol_cube_tenors_strikes_dates import get_vol_cube_tenors_strikes_dates
data, uniq_opt_tenors, uniq_swap_tenors, uniq_strikes, dates = get_vol_cube_tenors_strikes_dates()

# Normalize data
from src.data.vol.normalizer import Normalizer
normalizer = Normalizer()
data_norm = normalizer.normalize(data)

# Split train and test datasets
seed = 0 # other seeds provides the same result
from src.utils.get_train_test_datasets import get_train_test_datasets
data_norm_train, dates_train, data_norm_test, dates_test = get_train_test_datasets(data_norm,
                                                                                   dates,
                                                                                   seed=seed,
                                                                                   train_ratio=0.8)

data_train = normalizer.denormalize(data_norm_train)
data_test = normalizer.denormalize(data_norm_test)

##### Load the saved model and its history from the file

In [2]:
from src.utils.load_model_and_history import load_model_and_history

NAME = 'vol_cube_vae_van_2latd_400_200_200_100_3000ep_bat16_2e-06_seed0'
vae, history = load_model_and_history(NAME,
                                      data_type='vol')




  saveable.load_own_variables(weights_store.get(inner_path))


### Let's select that ATM vols will be missed for the date with largest vols

##### Create dataset with missed values

In [3]:
# Create dataset data_norm_test_missed with missed data
import numpy as np
missed_strike = 3  # 0 is ATM-100bp, 3 is ATM, 6 is ATM+100bp

# Import what value is considered as a missed value
from references.global_parameters import MISSED_VALUE

from src.visualization.vol.find_z_to_complete_vol_cube import find_z_to_complete_vol_cube
strikes = ['ATM-100bp', 'ATM-50bp', 'ATM', 'ATM+50bp', 'ATM+100bp']  # graphs only for these strikes

##### Calculate mse error over all test dates when we complete the dataset (takes 6 hours)

In [4]:
mse = 0
n = 0

for date_idx, val in enumerate(dates_test):
    print(f'{date_idx} out of {len(dates_test)}')
    # Create dataset with missed values
    data_test_missed = data_test.copy()
    data_test_missed[date_idx, :, :, missed_strike] = MISSED_VALUE
    # Create normalized dataset with missed values
    data_norm_test_missed = data_norm_test.copy()
    data_norm_test_missed[date_idx, :, :, missed_strike] = MISSED_VALUE

    # Complete the missed data in vol cube and check errors against real data
    z_optimal = find_z_to_complete_vol_cube(vae=vae,
                                        data=data_norm_test_missed[date_idx],
                                        random_attempt_num=30, # number of attempts starting from different random z initial values
                                        random_seed=0,
                                        print_status=False
                                        )
    
    predictions = normalizer.denormalize(vae.decoder.predict(np.array([z_optimal]), verbose=0))  # shape=(1,6,5,7)
    
    for i in range(predictions.shape[1]):
        for j in range(predictions.shape[2]):
            for k in range(predictions.shape[3]):
                if uniq_strikes[k] in strikes:
                    if data_test_missed[date_idx, i, j, k] != MISSED_VALUE:
                        predictions[0, i, j, k] = data_test[date_idx, i, j, k]
                    else:
                        mse += (predictions[0, i, j, k] - data_test[date_idx, i, j, k])**2
                        n += 1

(mse / n) ** 0.5, n

0 out of 266
1 out of 266
2 out of 266
3 out of 266
4 out of 266
5 out of 266
6 out of 266
7 out of 266
8 out of 266
9 out of 266
10 out of 266
11 out of 266
12 out of 266
13 out of 266
14 out of 266
15 out of 266
16 out of 266
17 out of 266
18 out of 266
19 out of 266
20 out of 266
21 out of 266
22 out of 266
23 out of 266
24 out of 266
25 out of 266
26 out of 266
27 out of 266
28 out of 266
29 out of 266
30 out of 266
31 out of 266
32 out of 266
33 out of 266
34 out of 266
35 out of 266
36 out of 266
37 out of 266
38 out of 266
39 out of 266
40 out of 266
41 out of 266
42 out of 266
43 out of 266
44 out of 266
45 out of 266
46 out of 266
47 out of 266
48 out of 266
49 out of 266
50 out of 266
51 out of 266
52 out of 266
53 out of 266
54 out of 266
55 out of 266
56 out of 266
57 out of 266
58 out of 266
59 out of 266
60 out of 266
61 out of 266
62 out of 266
63 out of 266
64 out of 266
65 out of 266
66 out of 266
67 out of 266
68 out of 266
69 out of 266
70 out of 266
71 out of 266
72

(0.7289213638447378, 7980)

### Let's select that all values except ATM vols will be missed for the date with largest vols

##### Create dataset with missed values

In [5]:
# Create dataset data_norm_test_missed with missed data
import numpy as np
missed_strikes = [0, 1, 2, 4, 5, 6]  # 0 is ATM-100bp, 3 is ATM, 6 is ATM+100bp

# Import what value is considered as a missed value
from references.global_parameters import MISSED_VALUE

from src.visualization.vol.find_z_to_complete_vol_cube import find_z_to_complete_vol_cube
strikes = ['ATM-100bp', 'ATM-50bp', 'ATM', 'ATM+50bp', 'ATM+100bp']  # graphs only for these strikes

##### Calculate mse error over all test dates when we complete the dataset (takes 6 hours)

In [6]:
mse = 0
n = 0

for date_idx, val in enumerate(dates_test):
    print(f'{date_idx} out of {len(dates_test)}')
    # Create dataset with missed values
    data_test_missed = data_test.copy()
    for missed_stk in missed_strikes:
        data_test_missed[date_idx,:, :, missed_stk] = MISSED_VALUE

    # Create normalized dataset with missed values
    data_norm_test_missed = data_norm_test.copy()
    for missed_stk in missed_strikes:
        data_norm_test_missed[date_idx,:, :, missed_stk] = MISSED_VALUE

    # Complete the missed data in vol cube and check errors against real data
    z_optimal = find_z_to_complete_vol_cube(vae=vae,
                                        data=data_norm_test_missed[date_idx],
                                        random_attempt_num=30, # number of attempts starting from different random z initial values
                                        random_seed=0,
                                        print_status=False
                                        )
    
    predictions = normalizer.denormalize(vae.decoder.predict(np.array([z_optimal]), verbose=0))  # shape=(1,6,5,7)
    
    for i in range(predictions.shape[1]):
        for j in range(predictions.shape[2]):
            for k in range(predictions.shape[3]):
                if uniq_strikes[k] in strikes:
                    if data_test_missed[date_idx, i, j, k] != MISSED_VALUE:
                        predictions[0, i, j, k] = data_test[date_idx, i, j, k]
                    else:
                        mse += (predictions[0, i, j, k] - data_test[date_idx, i, j, k])**2
                        n += 1

(mse / n) ** 0.5, n

0 out of 266
1 out of 266
2 out of 266
3 out of 266
4 out of 266
5 out of 266
6 out of 266
7 out of 266
8 out of 266
9 out of 266
10 out of 266
11 out of 266
12 out of 266
13 out of 266
14 out of 266
15 out of 266
16 out of 266
17 out of 266
18 out of 266
19 out of 266
20 out of 266
21 out of 266
22 out of 266
23 out of 266
24 out of 266
25 out of 266
26 out of 266
27 out of 266
28 out of 266
29 out of 266
30 out of 266
31 out of 266
32 out of 266
33 out of 266
34 out of 266
35 out of 266
36 out of 266
37 out of 266
38 out of 266
39 out of 266
40 out of 266
41 out of 266
42 out of 266
43 out of 266
44 out of 266
45 out of 266
46 out of 266
47 out of 266
48 out of 266
49 out of 266
50 out of 266
51 out of 266
52 out of 266
53 out of 266
54 out of 266
55 out of 266
56 out of 266
57 out of 266
58 out of 266
59 out of 266
60 out of 266
61 out of 266
62 out of 266
63 out of 266
64 out of 266
65 out of 266
66 out of 266
67 out of 266
68 out of 266
69 out of 266
70 out of 266
71 out of 266
72

(2.1914714202282726, 31920)