In [1]:
import pandas as pd
import numpy as np
from arch import arch_model
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, GRU, Conv1D, Flatten,Input, MultiHeadAttention, LayerNormalization, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.models import Model
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

In [4]:
real_prices_train=np.load('real_train_data_array.npy')
real_prices_test=np.load('real_test_data_array.npy')
synthetic_prices_vae=np.load('synthetic_data_array_vae.npy')
synthetic_prices_gan=np.load('synthetic_data_array_gan.npy')

In [5]:
real_returns_train=np.diff(real_prices_train, axis=1) / real_prices_train[:, :-1, :]
real_returns_test=np.diff(real_prices_test, axis=1) / real_prices_test[:, :-1, :]
synthetic_returns_vae=np.diff(synthetic_prices_vae, axis=1) / synthetic_prices_vae[:, :-1, :]
synthetic_returns_gan=np.diff(synthetic_prices_gan, axis=1) / synthetic_prices_gan[:, :-1, :]

In [6]:
def calculate_rolling_volatility(returns, window_size):
    """
    Calculate rolling volatility for a 3D array of returns.
    
    Parameters:
    returns (numpy.ndarray): 3D array with dimensions (samples, periods, stocks) containing the returns.
    window_size (int): The window size for calculating rolling volatility.
    
    Returns:
    numpy.ndarray: 3D array with dimensions (samples, periods - window_size + 1, stocks) containing the rolling volatility.
    """
    num_samples, num_periods, num_stocks = returns.shape
    result_periods = num_periods - window_size + 1
    rolling_volatility = np.zeros((num_samples, result_periods, num_stocks))
    
    for i in range(num_stocks):
        for j in range(num_samples):
            df = pd.Series(returns[j, :, i])
            rolling_vol = df.rolling(window=window_size).std().values
            # Fill NaNs with forward fill method
            rolling_vol = pd.Series(rolling_vol).fillna(method='ffill').fillna(method='bfill').values
            # Ensure correct slicing to avoid shape mismatch
            rolling_volatility[j, :, i] = rolling_vol[window_size - 1:]
    
    return rolling_volatility


In [7]:
window_size=5
real_vol_train = calculate_rolling_volatility(real_returns_train, window_size)
real_vol_test = calculate_rolling_volatility(real_returns_test, window_size)
synthetic_vol_vae = calculate_rolling_volatility(synthetic_returns_vae, window_size)
synthetic_vol_gan = calculate_rolling_volatility(synthetic_returns_gan, window_size)

  rolling_vol = pd.Series(rolling_vol).fillna(method='ffill').fillna(method='bfill').values


In [8]:
seq_len = real_vol_train.shape[1]
n_seq = real_vol_test.shape[2]

In [112]:
seq_len

19

In [69]:
scaler = MinMaxScaler()
scaler_vae = MinMaxScaler()
scaler_gan = MinMaxScaler()
scaled_real_vol_train = scaler.fit_transform(real_vol_train.reshape(-1, 20)).reshape(-1, seq_len, 20)
scaled_real_vol_test = scaler.transform(real_vol_test.reshape(-1, 20)).reshape(-1, seq_len, 20)
scaled_synthetic_vol_vae = scaler_vae.fit_transform(synthetic_vol_vae.reshape(-1, 20)).reshape(-1, seq_len, 20)
scaled_synthetic_vol_gan = scaler_gan.fit_transform(synthetic_vol_gan.reshape(-1, 20)).reshape(-1, seq_len, 20)


In [70]:
real_train_data = real_vol_train[:, :seq_len-1, :]
real_train_label = real_vol_train[:, -1, :]

scaled_real_train_data = scaled_real_vol_train[:, :seq_len-1, :]
scaled_real_train_label = scaled_real_vol_train[:, -1, :]

real_test_data = real_vol_test[:, :seq_len-1, :]
real_test_label = real_vol_test[:, -1, :]
garch_data=real_returns_test[:, -6:-1, :]
scaled_real_test_data = scaled_real_vol_test[:, :seq_len-1, :]
scaled_real_test_label = scaled_real_vol_test[:, -1, :]

synthetic_train_vae = scaled_synthetic_vol_vae[:, :seq_len-1, :]
synthetic_label_vae = scaled_synthetic_vol_vae[:, -1, :]
synthetic_train_gan = scaled_synthetic_vol_gan[:, :seq_len-1, :]
synthetic_label_gan = scaled_synthetic_vol_gan[:, -1, :]

In [71]:
synthetic_train2_vae=np.concatenate((synthetic_train_vae, scaled_real_train_data), axis=0)
synthetic_label2_vae=np.concatenate((synthetic_label_vae, scaled_real_train_label), axis=0)
synthetic_train2_gan=np.concatenate((synthetic_train_gan, scaled_real_train_data), axis=0)
synthetic_label2_gan=np.concatenate((synthetic_label_gan, scaled_real_train_label), axis=0)

In [72]:
# Get the number of samples
num_samples = synthetic_train2_vae.shape[0]

# Generate a permutation of indices
indices = np.random.permutation(num_samples)

# Shuffle the data and labels
shuffled_synthetic_train2_vae = synthetic_train2_vae[indices]
shuffled_synthetic_label2_vae = synthetic_label2_vae[indices]

In [73]:
# Get the number of samples
num_samples = synthetic_train2_gan.shape[0]

# Generate a permutation of indices
indices = np.random.permutation(num_samples)

# Shuffle the data and labels
shuffled_synthetic_train2_gan = synthetic_train2_gan[indices]
shuffled_synthetic_label2_gan = synthetic_label2_gan[indices]

# Conv1D + LSTM

## Real Data

In [29]:
# Define the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(18, 20)))
model.add(LSTM(64, return_sequences=False))  # Return sequences should be False for the final prediction step
model.add(Dense(20))

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(scaled_real_train_data, scaled_real_train_label, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100
[1m 92/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 8ms/step - loss: 0.0102
Epoch 1: val_loss improved from inf to 0.00371, saving model to best.weights.h5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 0.0099 - val_loss: 0.0037
Epoch 2/100
[1m 94/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 7ms/step - loss: 0.0045
Epoch 2: val_loss improved from 0.00371 to 0.00319, saving model to best.weights.h5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0045 - val_loss: 0.0032
Epoch 3/100
[1m 98/100[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0038
Epoch 3: val_loss improved from 0.00319 to 0.00283, saving model to best.weights.h5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0038 - val_loss: 0.0028
Epoch 4/100
[1m 91/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 8ms/step - loss: 0.0034
Epoch 4: val_los

## Synthetic VAE

In [25]:
# Define the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(18, 20)))
model.add(LSTM(64, return_sequences=False))  # Return sequences should be False for the final prediction step
model.add(Dense(20))

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(synthetic_train_vae, synthetic_label_vae, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m119/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0060
Epoch 1: val_loss improved from inf to 0.00231, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.0059 - val_loss: 0.0023
Epoch 2/100
[1m120/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0021
Epoch 2: val_loss improved from 0.00231 to 0.00206, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0021 - val_loss: 0.0021
Epoch 3/100
[1m117/125[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 7ms/step - loss: 0.0019
Epoch 3: val_loss improved from 0.00206 to 0.00177, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0019 - val_loss: 0.0018
Epoch 4/100
[1m121/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - loss: 0.0016
Epoch 4: val_loss improved f

## Real + Synthetic VAE

In [87]:
# Define the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(18, 20)))
model.add(LSTM(64, return_sequences=False))  # Return sequences should be False for the final prediction step
model.add(Dense(20))

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(synthetic_train2_vae, synthetic_label2_vae, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0064
Epoch 1: val_loss improved from inf to 0.00350, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - loss: 0.0064 - val_loss: 0.0035
Epoch 2/100
[1m223/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0031
Epoch 2: val_loss improved from 0.00350 to 0.00291, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.0031 - val_loss: 0.0029
Epoch 3/100
[1m217/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0023
Epoch 3: val_loss improved from 0.00291 to 0.00259, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.0023 - val_loss: 0.0026
Epoch 4/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0020
Epoch 4: val_loss improved f

In [88]:
prediction_lstm_vae=model.predict(scaled_real_test_data)
prediction_lstm_vae=scaler.inverse_transform(prediction_lstm_vae)

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


## Synthetic GAN

In [38]:
# Define the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(18, 20)))
model.add(LSTM(64, return_sequences=False))  # Return sequences should be False for the final prediction step
model.add(Dense(20))

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(synthetic_train_gan, synthetic_label_gan, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0029
Epoch 1: val_loss improved from inf to 0.00149, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - loss: 0.0029 - val_loss: 0.0015
Epoch 2/100
[1m120/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - loss: 0.0012
Epoch 2: val_loss improved from 0.00149 to 0.00093, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0012 - val_loss: 9.2959e-04
Epoch 3/100
[1m124/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - loss: 8.8730e-04
Epoch 3: val_loss improved from 0.00093 to 0.00093, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 8.8826e-04 - val_loss: 9.2527e-04
Epoch 4/100
[1m122/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 8.4788e-04
Epoch 4:

## Real + Synthetic GAN

In [90]:
# Define the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(18, 20)))
model.add(LSTM(64, return_sequences=False))  # Return sequences should be False for the final prediction step
model.add(Dense(20))

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(synthetic_train2_gan, synthetic_label2_gan, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100
[1m222/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0050
Epoch 1: val_loss improved from inf to 0.00327, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - loss: 0.0050 - val_loss: 0.0033
Epoch 2/100
[1m218/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0022
Epoch 2: val_loss improved from 0.00327 to 0.00268, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.0022 - val_loss: 0.0027
Epoch 3/100
[1m223/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - loss: 0.0018
Epoch 3: val_loss improved from 0.00268 to 0.00241, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.0018 - val_loss: 0.0024
Epoch 4/100
[1m221/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0016
Epoch 4: val_loss

In [91]:
prediction_lstm_gan=model.predict(real_test_data)
prediction_lstm_gan=scaler.inverse_transform(prediction_lstm_gan)

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


# GRU

## Real

In [50]:
# Define the model
model = Sequential([GRU(32, input_shape=(seq_len-1, n_seq)),
                        Dense(20)])

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(scaled_real_train_data, scaled_real_train_label, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100
[1m 98/100[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0142
Epoch 1: val_loss improved from inf to 0.00417, saving model to best.weights.h5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.0141 - val_loss: 0.0042
Epoch 2/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0050
Epoch 2: val_loss improved from 0.00417 to 0.00317, saving model to best.weights.h5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0050 - val_loss: 0.0032
Epoch 3/100
[1m 98/100[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0040
Epoch 3: val_loss improved from 0.00317 to 0.00285, saving model to best.weights.h5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0040 - val_loss: 0.0028
Epoch 4/100
[1m 99/100[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0034
Epoch 4: val_loss

## Synthetic VAE

In [48]:
# Define the model
model = Sequential([GRU(32, input_shape=(seq_len-1, n_seq)),
                        Dense(20)])

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(synthetic_train_vae, synthetic_label_vae, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100


  super().__init__(**kwargs)


[1m117/125[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 6ms/step - loss: 0.0065
Epoch 1: val_loss improved from inf to 0.00268, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.0063 - val_loss: 0.0027
Epoch 2/100
[1m117/125[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 7ms/step - loss: 0.0023
Epoch 2: val_loss improved from 0.00268 to 0.00198, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0023 - val_loss: 0.0020
Epoch 3/100
[1m118/125[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 6ms/step - loss: 0.0018
Epoch 3: val_loss improved from 0.00198 to 0.00162, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0018 - val_loss: 0.0016
Epoch 4/100
[1m120/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0014
Epoch 4: val_loss improved fr

## Real + Synthetic VAE

In [93]:
# Define the model
model = Sequential([GRU(32, input_shape=(seq_len-1, n_seq)),
                        Dense(20)])

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(shuffled_synthetic_train2_vae, shuffled_synthetic_label2_vae, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100
[1m219/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - loss: 0.0120
Epoch 1: val_loss improved from inf to 0.00323, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.0118 - val_loss: 0.0032
Epoch 2/100
[1m218/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0031
Epoch 2: val_loss improved from 0.00323 to 0.00241, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.0031 - val_loss: 0.0024
Epoch 3/100
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0023
Epoch 3: val_loss improved from 0.00241 to 0.00197, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.0023 - val_loss: 0.0020
Epoch 4/100
[1m222/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0019
Epoch 4: val_loss

In [94]:
prediction_gru_vae=model.predict(scaled_real_test_data)
prediction_gru_vae=scaler.inverse_transform(prediction_gru_vae)

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


## Synthetic GAN

In [54]:
# Define the model
model = Sequential([GRU(32, input_shape=(seq_len-1, n_seq)),
                        Dense(20)])

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(synthetic_train_gan, synthetic_label_gan, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100
[1m123/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0038
Epoch 1: val_loss improved from inf to 0.00165, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.0037 - val_loss: 0.0017
Epoch 2/100
[1m123/125[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0014
Epoch 2: val_loss improved from 0.00165 to 0.00121, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0014 - val_loss: 0.0012
Epoch 3/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0011
Epoch 3: val_loss improved from 0.00121 to 0.00092, saving model to best.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0011 - val_loss: 9.2401e-04
Epoch 4/100
[1m116/125[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - loss: 9.1414e-04
Epoch 4: 

## Real + Synthetic GAN

In [95]:
# Define the model
model = Sequential([GRU(32, input_shape=(seq_len-1, n_seq)),
                        Dense(20)])

# Compile the model
model.compile(optimizer='adam', loss='mse')
# Define the ModelCheckpoint callback
checkpoint_filepath = 'best.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=False  # Restore model weights from the epoch with the best value of the monitored quantity
)

# Fit the model
history=model.fit(shuffled_synthetic_train2_gan, shuffled_synthetic_label2_gan, epochs=100, batch_size=32, validation_split=0.2,
          callbacks=[checkpoint_callback,early_stopping_callback])

# Evaluate the model
model.load_weights(checkpoint_filepath)
loss = model.evaluate(scaled_real_test_data, scaled_real_test_label)
print(f'Test loss: {loss}')

Epoch 1/100


  super().__init__(**kwargs)


[1m224/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - loss: 0.0070
Epoch 1: val_loss improved from inf to 0.00290, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 0.0069 - val_loss: 0.0029
Epoch 2/100
[1m220/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0024
Epoch 2: val_loss improved from 0.00290 to 0.00209, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0024 - val_loss: 0.0021
Epoch 3/100
[1m218/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0019
Epoch 3: val_loss improved from 0.00209 to 0.00178, saving model to best.weights.h5
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.0019 - val_loss: 0.0018
Epoch 4/100
[1m221/225[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - loss: 0.0017
Epoch 4: val_loss improved fr

In [96]:
prediction_gru_gan=model.predict(scaled_real_test_data)
prediction_gru_gan=scaler.inverse_transform(prediction_gru_gan)

[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step


In [79]:
garch_data.shape

(978, 5, 20)

In [80]:
results_garch_1_1 = np.empty((garch_data.shape[0], garch_data.shape[2]))
results_garch_2_1 = np.empty((garch_data.shape[0], garch_data.shape[2]))
results_garch_1_2 = np.empty((garch_data.shape[0], garch_data.shape[2]))

# Loop through the data and fit GARCH model
for i in range(garch_data.shape[0]):
    for j in range(garch_data.shape[2]):
        model = arch_model(garch_data[i, :, j], vol='Garch', p=1, q=1, rescale=False)
        model_fit = model.fit(disp='off')
        forecasts = model_fit.forecast(horizon=1)
        forecasted_variance = forecasts.variance.iloc[-1, 0]
        forecasted_std_dev = np.sqrt(forecasted_variance)
        results_garch_1_1[i, j] = forecasted_std_dev

        model = arch_model(garch_data[i, :, j], vol='Garch', p=2, q=1, rescale=False)
        model_fit = model.fit(disp='off')
        forecasts = model_fit.forecast(horizon=1)
        forecasted_variance = forecasts.variance.iloc[-1, 0]
        forecasted_std_dev = np.sqrt(forecasted_variance)
        results_garch_2_1[i, j] = forecasted_std_dev

        model = arch_model(garch_data[i, :, j], vol='Garch', p=1, q=2, rescale=False)
        model_fit = model.fit(disp='off')
        forecasts = model_fit.forecast(horizon=1)
        forecasted_variance = forecasts.variance.iloc[-1, 0]
        forecasted_std_dev = np.sqrt(forecasted_variance)
        results_garch_1_2[i, j] = forecasted_std_dev

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative 

In [97]:
tickers=['AAPL', 'MSFT','JNJ', 'AMZN','XOM', 'JPM', 'LQD', 'SHY','IEF', 'TLT','^GSPC', 
         '^DJI', '^IXIC', 'GC=F', 'CL=F','IWM','FCEL', 'GBPUSD=X', 'JPY=X', 'EURUSD=X']

In [98]:
result_df=pd.DataFrame(columns= ["Mean Absolute Error Garch(1,1)", "Mean Absolute Error Garch(2,1)", "Mean Absolute Error Garch(1,2)"])
for k in range(20):
    actual=real_test_label[:,k]

    predicted_lstm_vae=prediction_lstm_vae[:,k]
    predicted_lstm_gan=prediction_lstm_gan[:,k]
    predicted_gru_vae=prediction_gru_vae[:,k]
    predicted_gru_gan=prediction_gru_gan[:,k]

    predicted_garch_1_1=results_garch_1_1[:,k]
    predicted_garch_2_1=results_garch_2_1[:,k]
    predicted_garch_1_2=results_garch_1_2[:,k]

    mse_lstm_vae = mean_absolute_error(actual, predicted_lstm_vae)
    mse_lstm_gan = mean_absolute_error(actual, predicted_lstm_gan)
    mse_gru_vae = mean_absolute_error(actual, predicted_gru_vae)
    mse_gru_gan = mean_absolute_error(actual, predicted_gru_gan)

    mse_garch_1_1 = mean_absolute_error(actual, predicted_garch_1_1)
    mse_garch_2_1 = mean_absolute_error(actual, predicted_garch_2_1)
    mse_garch_1_2 = mean_absolute_error(actual, predicted_garch_1_2)

    result_df.loc[k,'Mean Absolute Error LSTM VAE']=mse_lstm_vae
    result_df.loc[k,'Mean Absolute Error LSTM GAN']=mse_lstm_gan
    result_df.loc[k,'Mean Absolute Error GRU VAE']=mse_gru_vae
    result_df.loc[k,'Mean Absolute Error GRU GAN']=mse_gru_gan

    result_df.loc[k,'Mean Absolute Error Garch(1,1)']=mse_garch_1_1
    result_df.loc[k,'Mean Absolute Error Garch(2,1)']=mse_garch_2_1
    result_df.loc[k,'Mean Absolute Error Garch(1,2)']=mse_garch_1_2

In [99]:
result_df

Unnamed: 0,"Mean Absolute Error Garch(1,1)","Mean Absolute Error Garch(2,1)","Mean Absolute Error Garch(1,2)",Mean Absolute Error LSTM VAE,Mean Absolute Error LSTM GAN,Mean Absolute Error GRU VAE,Mean Absolute Error GRU GAN
0,0.004022,0.004764,0.004169,0.003732,0.01613,0.003395,0.003417
1,0.003944,1.061441,0.004089,0.004034,0.016146,0.003504,0.003372
2,0.002431,0.003613,0.002585,0.002407,0.009363,0.002105,0.002122
3,0.004752,0.005294,0.004906,0.004871,0.018951,0.004281,0.004508
4,0.004624,0.008738,0.004778,0.004504,0.017803,0.004336,0.004251
5,0.003765,0.009451,0.003911,0.005268,0.014686,0.003695,0.004075
6,0.001204,0.001281,0.001204,0.001799,0.005333,0.00131,0.001226
7,0.015195,0.004485,0.611592,0.000252,0.000913,0.000226,0.000218
8,0.001016,0.010685,0.000944,0.000888,0.004037,0.000811,0.000889
9,0.002194,0.002519,0.002258,0.002058,0.00999,0.001876,0.001931


In [123]:
#min_values_per_row = result_df.min(axis=1)
#result_df[result_df['Mean Absolute Error NN'] == min_values_per_row].shape[0]


10

In [105]:
min_values_per_row = result_df.min(axis=1)
result_df[result_df['Mean Absolute Error GRU VAE'] == min_values_per_row].shape[0]

11

In [106]:
min_values_per_row = result_df.min(axis=1)
result_df[result_df['Mean Absolute Error GRU GAN'] == min_values_per_row].shape[0]

5

In [100]:
min_values_per_row = result_df.min(axis=1)
result_df[result_df["Mean Absolute Error Garch(1,1)"] == min_values_per_row].shape[0]

3

In [101]:
min_values_per_row = result_df.min(axis=1)
result_df[result_df["Mean Absolute Error Garch(2,1)"] == min_values_per_row].shape[0]

0

In [102]:
min_values_per_row = result_df.min(axis=1)
result_df[result_df["Mean Absolute Error Garch(1,2)"] == min_values_per_row].shape[0]

1

In [109]:
result_df.index=tickers
result_df[['Mean Absolute Error Garch(1,1)', 'Mean Absolute Error Garch(2,1)',
       'Mean Absolute Error Garch(1,2)','Mean Absolute Error GRU VAE',
       'Mean Absolute Error GRU GAN']]

Unnamed: 0,"Mean Absolute Error Garch(1,1)","Mean Absolute Error Garch(2,1)","Mean Absolute Error Garch(1,2)",Mean Absolute Error GRU VAE,Mean Absolute Error GRU GAN
AAPL,0.004022,0.004764,0.004169,0.003395,0.003417
MSFT,0.003944,1.061441,0.004089,0.003504,0.003372
JNJ,0.002431,0.003613,0.002585,0.002105,0.002122
AMZN,0.004752,0.005294,0.004906,0.004281,0.004508
XOM,0.004624,0.008738,0.004778,0.004336,0.004251
JPM,0.003765,0.009451,0.003911,0.003695,0.004075
LQD,0.001204,0.001281,0.001204,0.00131,0.001226
SHY,0.015195,0.004485,0.611592,0.000226,0.000218
IEF,0.001016,0.010685,0.000944,0.000811,0.000889
TLT,0.002194,0.002519,0.002258,0.001876,0.001931


In [110]:
result_df=pd.DataFrame(columns= ["Mean Absolute Error Garch(1,1)", "Mean Absolute Error Garch(2,1)", "Mean Absolute Error Garch(1,2)"])
for k in range(20):
    actual=real_test_label[:,k]

    predicted_lstm_vae=prediction_lstm_vae[:,k]
    predicted_lstm_gan=prediction_lstm_gan[:,k]
    predicted_gru_vae=prediction_gru_vae[:,k]
    predicted_gru_gan=prediction_gru_gan[:,k]

    predicted_garch_1_1=results_garch_1_1[:,k]
    predicted_garch_2_1=results_garch_2_1[:,k]
    predicted_garch_1_2=results_garch_1_2[:,k]

    mse_lstm_vae = mean_squared_error(actual, predicted_lstm_vae)
    mse_lstm_gan = mean_squared_error(actual, predicted_lstm_gan)
    mse_gru_vae = mean_squared_error(actual, predicted_gru_vae)
    mse_gru_gan = mean_squared_error(actual, predicted_gru_gan)

    mse_garch_1_1 = mean_squared_error(actual, predicted_garch_1_1)
    mse_garch_2_1 = mean_squared_error(actual, predicted_garch_2_1)
    mse_garch_1_2 = mean_squared_error(actual, predicted_garch_1_2)

    result_df.loc[k,'Mean Absolute Error LSTM VAE']=mse_lstm_vae
    result_df.loc[k,'Mean Absolute Error LSTM GAN']=mse_lstm_gan
    result_df.loc[k,'Mean Absolute Error GRU VAE']=mse_gru_vae
    result_df.loc[k,'Mean Absolute Error GRU GAN']=mse_gru_gan

    result_df.loc[k,'Mean Absolute Error Garch(1,1)']=mse_garch_1_1
    result_df.loc[k,'Mean Absolute Error Garch(2,1)']=mse_garch_2_1
    result_df.loc[k,'Mean Absolute Error Garch(1,2)']=mse_garch_1_2

In [111]:
result_df

Unnamed: 0,"Mean Absolute Error Garch(1,1)","Mean Absolute Error Garch(2,1)","Mean Absolute Error Garch(1,2)",Mean Absolute Error LSTM VAE,Mean Absolute Error LSTM GAN,Mean Absolute Error GRU VAE,Mean Absolute Error GRU GAN
0,4.4e-05,6.4e-05,4.7e-05,4.834332e-05,0.00038,3.514628e-05,2.953427e-05
1,4.3e-05,1092.437218,4.5e-05,7.833026e-05,0.000381,4.098685e-05,2.767448e-05
2,1.6e-05,0.000939,2e-05,1.643713e-05,0.000148,1.250943e-05,1.205642e-05
3,6.2e-05,7e-05,6.4e-05,5.864503e-05,0.000477,4.82841e-05,4.916298e-05
4,5.8e-05,0.011727,6.1e-05,5.648127e-05,0.000431,7.410638e-05,7.130809e-05
5,4.4e-05,0.027774,4.8e-05,8.936054e-05,0.000374,7.101673e-05,0.0001126925
6,5e-06,6e-06,6e-06,4.612792e-05,4.9e-05,1.176413e-05,5.859508e-06
7,0.061844,0.004361,359.467545,2.798203e-07,2e-06,1.344827e-07,1.186641e-07
8,2.3e-05,0.046038,6e-06,2.042625e-06,2.4e-05,1.468252e-06,2.228913e-06
9,1.2e-05,1.6e-05,1.3e-05,1.23017e-05,0.000136,8.049837e-06,9.205677e-06
