##### Download, normalize and split vol cube data into train/test datasets

In [None]:
import sys
sys.path.append('../..')  # to go to the main folder of the whole project

# Download the data
from src.data.vol.get_vol_cube_tenors_strikes_dates import get_vol_cube_tenors_strikes_dates
data, opt_tenors, swap_tenors, strikes, dates = get_vol_cube_tenors_strikes_dates()

# Normalize data
from src.data.vol.normalizer import Normalizer
normalizer = Normalizer()
data_norm = normalizer.normalize(data)

# Split train and test datasets
dataset_split_type = 'random_split'
from src.utils.get_train_test_datasets import get_train_test_datasets
data_norm_train, dates_train, data_norm_test, dates_test = get_train_test_datasets(data_norm,
                                                                                   dates,
                                                                                   seed=0,
                                                                                   train_ratio=0.8,
                                                                                   type=dataset_split_type)
data_train = normalizer.denormalize(data_norm_train)
data_test = normalizer.denormalize(data_norm_test)

##### Create and train the model (it takes 10 minutes to train)

In [None]:
EPOCHS = 3000
BATCH_SIZE = 16
HIDDEN_LAYERS_NODES =  [200, 100, 50, 25]
LOSS_TYPE = 'mse'
BETA = '1e-5'   # weight of Kullback-Leibler (KL) loss when we calculate total loss as a sum of KL loss and reconstruction loss.
LATENT_SPACE_DIM = 3
ACTIVATION = 'leaky_relu'   # or 'relu'

# Create the name of the model based on characteristics
NAME = 'vae_van_' + ACTIVATION + '_' + dataset_split_type + '_' + str(LATENT_SPACE_DIM) + '_' 
for i, nodes in enumerate(HIDDEN_LAYERS_NODES):
    NAME += str(nodes) + '_'
NAME += str(EPOCHS) + 'ep_bat' + str(BATCH_SIZE) +'_' + BETA
print(NAME)


# Create VAE and fit it
from src.models.vae_vanilla import VaeVanilla
vae = VaeVanilla(input_shape=data_norm_train.shape[1:],
                 hidden_layers_nodes=HIDDEN_LAYERS_NODES,
                 latent_space_dim=LATENT_SPACE_DIM,
                 loss_type=LOSS_TYPE,  
                 beta=float(BETA),
                 activation='leaky_relu')
vae.compile(optimizer='adam')
history = vae.fit(data_norm_train, 
                  epochs=EPOCHS, 
                  verbose=0,
                  batch_size=BATCH_SIZE,
                  validation_data=data_norm_test)

##### Save the model

In [None]:
from src.utils.save_model_and_history import save_model_and_history
save_model_and_history(vae, 
                       history, 
                       NAME,
                       data_type='vol')