# Optiver Realized Volitatliy Prediction

## Importing libraries

In [None]:
pip install tsfresh

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tsfresh
  Downloading tsfresh-0.19.0-py2.py3-none-any.whl (97 kB)
[K     |████████████████████████████████| 97 kB 4.6 MB/s 
Collecting stumpy>=1.7.2
  Downloading stumpy-1.11.1-py3-none-any.whl (136 kB)
[K     |████████████████████████████████| 136 kB 61.6 MB/s 
Collecting statsmodels>=0.13
  Downloading statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[K     |████████████████████████████████| 9.8 MB 64.9 MB/s 
Collecting matrixprofile<2.0.0,>=1.1.10
  Downloading matrixprofile-1.1.10-cp37-cp37m-manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 52.4 MB/s 
Collecting protobuf==3.11.2
  Downloading protobuf-3.11.2-cp37-cp37m-manylinux1_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 56.6 MB/s 
Installing collected packages: protobuf, stumpy, statsmodels, matrixprofile, tsfresh
  Attemp

In [None]:
import numpy as np 
import pandas as pd 
import os
import gc
import glob

from itertools import islice
#import librosa
#from tsfresh.feature_extraction import feature_calculators
from multiprocessing import Pool
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow as tf
import keras.backend as K
from tensorflow.keras.layers import Dense, Lambda, Dot, Activation, Concatenate
from tensorflow.keras.layers import Layer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score

from tqdm.auto import tqdm
tqdm.pandas()
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
# setting some globl config

plt.style.use('ggplot')
orange_black = [
    '#fdc029', '#df861d', '#FF6347', '#aa3d01', '#a30e15', '#800000', '#171820'
]
plt.rcParams['figure.figsize'] = (16,9)
plt.rcParams["figure.facecolor"] = '#FFFACD'
plt.rcParams["axes.facecolor"] = '#FFFFE0'
plt.rcParams["axes.grid"] = True
plt.rcParams["grid.color"] = orange_black[3]
plt.rcParams["grid.alpha"] = 0.5
plt.rcParams["grid.linestyle"] = '--'


import warnings
warnings.filterwarnings("ignore")

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


## Paths

In [None]:
data_dir = '/content/drive/MyDrive/Optiver_Volatility'
book_path = '/content/drive/MyDrive/Optiver_Volatility/book_train.parquet'
trade_path = '/content/drive/MyDrive/Optiver_Volatility/trade_train.parquet'
CHECKPOINT = './model_checkpoint/model_01'

In [None]:
bucket_win = [(0,   100), (100, 200), (200, 300), (300, 400), (400, 500), (500, 600)]
skip_features = ['time_id', 'row_id', 'target']

In [None]:
def path_dir(path, csv):

    path_dic = {}
    for i in tqdm(csv):
        parquet_path = f'{path}/stock_id={i}'
        path_list = glob.glob(os.path.join(parquet_path, '*.parquet'))
    
        if len(path_list) > 0:
            path_dic[i] = path_list[0]
    
    return path_dic

In [None]:
train_dic = pd.read_csv(os.path.join(data_dir, 'train.csv'))

In [None]:
print(f'Train dataset shape: {train_dic.shape}')

Train dataset shape: (428932, 3)


In [None]:
train_dic['row_id'] = train_dic['stock_id'].astype(str) + '-' + train_dic['time_id'].astype(str)

In [None]:
book_train_dic = path_dir(book_path, train_dic['stock_id'].unique())
trade_train_dic = path_dir(trade_path, train_dic['stock_id'].unique())

  0%|          | 0/112 [00:00<?, ?it/s]

  0%|          | 0/112 [00:00<?, ?it/s]

## Feature Engineering

In [None]:
def wap1(df):
    wap = (df['bid_price1'] * df['ask_size1'] + df['ask_price1'] * df['bid_size1']) / (df['bid_size1'] + df['ask_size1'])
    return wap

def wap2(df):
    wap = (df['bid_price2'] * df['ask_size2'] + df['ask_price2'] * df['bid_size2']) / (df['bid_size2'] + df['ask_size2'])
    return wap

def wap3(df):
    wap = (df['bid_price1'] * df['bid_size1'] + df['ask_price1'] * df['ask_size1']) / (df['bid_size1'] + df['ask_size1'])
    return wap

def wap4(df):
    wap = (df['bid_price2'] * df['bid_size2'] + df['ask_price2'] * df['ask_size2']) / (df['bid_size2'] + df['ask_size2'])
    return wap

def roll_mean(df, col, window_size):
    rolling_mean = df[col].rolling(window=window_size).mean()
    return rolling_mean

def roll_std(df, col, window_size):
    rolling_std = df[col].rolling(window=window_size).std()
    return rolling_std

def roll_memw(df, col):
    rolling_memw = df[col].ewm(span=10).mean()
    return rolling_memw

def log(series):
    return np.log(series).diff()

def realized_volatility(series):
    return np.sqrt(np.sum(series**2))

def count_unique(series):
    return len(np.unique(series))

## Book Train parquet Feature Engineering

In [None]:
def book_feature(book_data):
    # Calculate Weighted average price and log return
    book_data['wap1'] = wap1(book_data)
    book_data['wap2'] = wap2(book_data)
    book_data['log1'] = book_data.groupby(['time_id'])['wap1'].apply(log)
    book_data['log2'] = book_data.groupby(['time_id'])['wap2'].apply(log)
    
    # Calculate weighted average price balance
    book_data['wap_balance'] = abs(book_data['wap1'] - book_data['wap2'])
    
    # Calculate Ask and bid spread
    book_data['price_spread'] = (book_data['ask_price1'] - book_data['bid_price1']) / ((book_data['ask_price1'] + book_data['bid_price1']) / 2)
    book_data['price_spread2'] = (book_data['ask_price2'] - book_data['bid_price2']) / ((book_data['ask_price2'] + book_data['bid_price2']) / 2)
    
    book_data['bid_spread'] = book_data['bid_price1'] - book_data['bid_price2']
    book_data['ask_spread'] = book_data['ask_price1'] - book_data['ask_price2']
    book_data["bid_ask_spread"] = abs(book_data['bid_spread'] - book_data['ask_spread'])
    
    book_data['total_volume'] = (book_data['ask_size1'] + book_data['ask_size2']) + (book_data['bid_size1'] + book_data['bid_size2'])
    book_data['volume_imbalance'] = abs((book_data['ask_size1'] + book_data['ask_size2']) - (book_data['bid_size1'] + book_data['bid_size2']))
    
    # calculate mean of features and weighted average price mean
    window = 10
    bid_price1_ma = roll_mean(book_data, 'bid_price1', window)
    ask_size1_ma = roll_mean(book_data, 'ask_size1', window)
    ask_price1_ma = roll_mean(book_data, 'ask_price1', window)
    bid_size1_ma = roll_mean(book_data, 'bid_size1', window)
    
    bid_price2_ma = roll_mean(book_data, 'bid_price2', window)
    ask_size2_ma = roll_mean(book_data, 'ask_size2', window)
    ask_price2_ma = roll_mean(book_data, 'ask_price2', window)
    bid_size2_ma = roll_mean(book_data, 'bid_size2', window)
    
    book_data['wap1_ma'] = (bid_price1_ma * ask_size1_ma + ask_price1_ma * bid_size1_ma) / (bid_size1_ma + ask_size1_ma)
    book_data['wap2_ma'] = (bid_price2_ma * ask_size2_ma + ask_price2_ma * bid_size2_ma) / (bid_size2_ma + ask_size2_ma)
    
    return book_data

## Trade Train parquet Feature Engineering

In [None]:
def trade_features(trade_data):
    trade_data['log_return'] = trade_data.groupby('time_id')['price'].apply(log)
    trade_data['amount'] = trade_data['price'] * trade_data['size']
    return trade_data

In [None]:
# ''' MFCC coefficients contain information about the rate changes in the different spectrum bands '''
# def mfcc_coeff(feature):
#     mfcc_coeff = np.zeros((1, feature.shape[1]))
#     for i in range(feature.shape[1]):
#         mfcc = librosa.feature.mfcc(feature[:, i])
#         mfcc_mean = mfcc.mean(axis=1)
#         mfcc_coeff[:, i] = mfcc_mean[1]
#     return mfcc_coeff

In [None]:
# def peaks(a):
#     peaks = np.zeros((1, a.shape[1]))
#     for i in range(a.shape[1]):
#         peaks[:, i] = feature_calculators.number_peaks(a[:, i], 2)
#     return peaks

## Computing Statistics of Data

In [None]:
bucket_size = [(0,   100), (100, 200), (200, 300), (300, 400), (400, 500), (500, 600)]
def seq_statistics(series, seconds):
    ''' a - array, s - seconds_in_bucket'''
    
    seq_stat = []
    for w in bucket_size:
        indexes = np.where(np.logical_and(seconds >= w[0], seconds < w[1]))[0]
       
        seq_min = np.zeros((1, series.shape[1]))
        seq_max = np.zeros((1, series.shape[1]))
        seq_mean = np.zeros((1, series.shape[1]))
        seq_std = np.zeros((1, series.shape[1]))
        seq_median = np.zeros((1, series.shape[1]))
        seq_sum = np.zeros((1, series.shape[1]))
        
        if series[indexes].shape[0] > 0:
            seq_min = np.min(series[indexes], axis=0, keepdims=True)
            seq_max = np.max(series[indexes], axis=0, keepdims=True)
            seq_mean = np.mean(series[indexes], axis=0, keepdims=True)
            seq_std = np.std(series[indexes], axis=0, keepdims=True)
            seq_median = np.median(series[indexes], axis=0, keepdims=True)
            seq_sum = np.sum(series[indexes], axis=0, keepdims=True)
            
        seq_stat.append(np.concatenate((seq_min, seq_max, seq_mean, seq_std, seq_median, seq_sum), axis=0))
        
    return np.nan_to_num(np.concatenate(seq_stat, axis=0).transpose())

## Optiver Dataset Preprocessing

In [None]:
def optiver_preprocessing(ds, path_dic, fe_func, skip_features, train_flg=True):
    
    X = []
    Y = []
    
    for stock_id, stock_path in tqdm(path_dic.items()):

        optiver_ds = pd.read_parquet(stock_path)
        optiver_ds['row_id'] = str(stock_id) + '-' + optiver_ds['time_id'].astype(str)

        sds = ds[ds['stock_id'] == stock_id]

        cols = ['time_id', 'target']
        if train_flg == False:
            cols = ['time_id']
            
        merge_ds = pd.merge(sds[cols], optiver_ds, on='time_id', how='left')
        merge_ds = fe_func(merge_ds).fillna(0)
        
        cols = [c for c in merge_ds.columns if c not in skip_features]

        np_ds = merge_ds[cols].to_numpy(dtype=np.float16)
        seconds_in_bucket = merge_ds['seconds_in_bucket'].to_numpy()
        g_idx = merge_ds[['time_id']].to_numpy()
        
        l = np.unique(g_idx, return_index=True)[1][1:]        
        a_list = np.split(np_ds, l)
        s_list = np.split(seconds_in_bucket, l)

        stat = list(map(seq_statistics, a_list, s_list))
        b = np.transpose(np.dstack(stat), (2, 1, 0))
        b = b.astype(np.float16)
        
        r = []
        if train_flg:
            targets = merge_ds[['target']].to_numpy(dtype=np.float16)
            t_list = np.split(targets, l)
            r = [t[0][0] for t in t_list]
        
        X.append(b)
        Y.append(r)
        #break
    return X, Y

## Preprocessing of Chunks

In [None]:
def chunks(data, SIZE=10000):
    it = iter(data)
    for i in range(0, len(data), SIZE):
        yield {k:data[k] for k in islice(it, SIZE)}
        
def book_train_preprocessing(chunk_ds):
    return optiver_preprocessing(train_dic, chunk_ds, book_feature, skip_features)
def trade_train_preprocessing(chunk_ds):
    return optiver_preprocessing(train_dic, chunk_ds, trade_features, skip_features)

In [None]:
n_thread = 4
book_train_chunks = [i for i in chunks(book_train_dic, int(len(book_train_dic)/n_thread))]
trade_train_chunks = [i for i in chunks(trade_train_dic, int(len(trade_train_dic)/n_thread))]

## MultiProcessing

In [None]:
%%time
pool = Pool(n_thread)
thread = pool.map(book_train_preprocessing, book_train_chunks)
pool.close()

a1, a2 = zip(*thread)
X_books = [np.concatenate(a1[i], axis=0) for i in range(len(a1))]
X_books = np.concatenate(X_books, axis=0)

targets = [np.concatenate(a2[i], axis=0) for i in range(len(a2))]
targets = np.concatenate(targets, axis=0)

Process ForkPoolWorker-2:
Process ForkPoolWorker-1:
Process ForkPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/usr/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Process ForkPoolWorker-4:
  File "/usr/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "/usr/lib/python3.7/multiproces

KeyboardInterrupt: ignored

In [None]:
%%time 
pool = Pool(n_thread)
trade_thread = pool.map(trade_train_preprocessing, trade_train_chunks)
pool.close()

a1, _ = zip(*trade_thread)
X_trades = [np.concatenate(a1[i], axis=0) for i in range(len(a1))]
X_trades = np.concatenate(X_trades, axis=0)

CPU times: user 3.61 s, sys: 1.43 s, total: 5.04 s
Wall time: 9min 21s


In [None]:
print(X_books.shape, X_trades.shape, targets.shape)
train = np.concatenate((X_books, X_trades), axis=2)
print(train.shape, targets.shape)

(428932, 36, 23) (428932, 36, 6) (428932,)
(428932, 36, 29) (428932,)


In [None]:
train.shape[1]

36

## Spliting of Train, Val and Test data

In [None]:
idx = np.arange(train.shape[0])
train_idx, test_idx = train_test_split(idx, shuffle=False, test_size=0.1, random_state=42)
train_idx, val_idx = train_test_split(train_idx, shuffle=False, test_size=0.2, random_state=42)

In [None]:
train_idx.shape

(308830,)

## Data Normalization

In [None]:
# Scaler
transformers = []
for i in tqdm(range(train.shape[1])):
    a = np.nan_to_num(train[train_idx, i, :])
    b = np.nan_to_num(train[val_idx, i, :])
    c = np.nan_to_num(train[test_idx, i, :])

    transformer = StandardScaler() #StandardScaler is very useful!
    train[train_idx, i, :] = transformer.fit_transform(a)
    train[val_idx, i, :] = transformer.transform(b)
    train[test_idx, i, :] = transformer.transform(c)
    transformers.append(transformer) #Save Scalers for the inference stage

  0%|          | 0/36 [00:00<?, ?it/s]

In [None]:
def rmspe(y_true, y_pred):
    return K.sqrt(K.mean(K.square((y_true - y_pred) / y_true)))

## Data Generator

In [None]:
class Data_Generator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, ds, targets, batch_size, shape=(32,32,32), shuffle=True):
        'Initialization'
        self.batch_size = batch_size
        self.targets = targets
        self.shape = shape
        self.ds = ds
        self.ids = np.arange(ds.shape[0])
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.ids[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        ids_temp = [self.ids[k] for k in indexes]


        x = self.ds[ids_temp, :, :]
        y = self.targets[ids_temp]
        
        return x, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.ids = np.arange(self.ds.shape[0])
        if self.shuffle == True:
            np.random.shuffle(self.ids)

In [None]:
batch_size = 256

training_generator = Data_Generator(train[train_idx, :, :], targets[train_idx], batch_size=batch_size)
validation_generator = Data_Generator(train[val_idx, :, :], targets[val_idx], batch_size=batch_size)
test_generator = Data_Generator(train[test_idx, :, :], targets[test_idx], batch_size=batch_size)

In [None]:
from keras.models import Model, Input, load_model
from keras.layers import Dense, Embedding, Masking, Flatten, Conv1D, Conv2D
from keras.layers import LSTM, Concatenate, Add, BatchNormalization, Bidirectional
from keras.layers import GlobalAveragePooling1D, Permute, GlobalMaxPool1D, MaxPooling1D
from keras.layers import Input, Dense, LSTM, CuDNNLSTM, concatenate, Activation, GRU, SimpleRNN

## LSTM Long Short Term Memory

In [None]:
def get_model_v1(input_shape, output):
  input = Input(input_shape)
  x = LSTM(128, return_sequences=True)(input)
  x = LSTM(64, return_sequences=True)(x)
  x = LSTM(10, return_sequences=False)(x)

  output = Dense(1)(x)
  model = Model(input, output)
  model.compile(loss=rmspe, optimizer='adam')
  model.summary()

  return model

In [None]:
input_shape = (36, 29)
output_shape = 1
model = get_model_v1(input_shape, output_shape)
checkpoint_filepath = CHECKPOINT
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)

model_earlystopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 36, 29)]          0         
                                                                 
 lstm (LSTM)                 (None, 36, 128)           80896     
                                                                 
 lstm_1 (LSTM)               (None, 36, 64)            49408     
                                                                 
 lstm_2 (LSTM)               (None, 10)                3000      
                                                                 
 dense (Dense)               (None, 1)                 11        
                                                                 
Total params: 133,315
Trainable params: 133,315
Non-trainable params: 0
_________________________________________________________________


In [None]:
epochs = 50
history = model.fit_generator(generator=training_generator, 
                              callbacks=[model_checkpoint_callback, model_earlystopping_callback], 
                              epochs=epochs, 
                              validation_data=validation_generator, 
                              use_multiprocessing=False, 
                              workers=n_thread)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50

### Training vs Validation Loss

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Loss Graph")
plt.show()

In [None]:
for X, Y in test_generator:
   X_test = X
   y_test = Y

In [None]:
prediction = model.predict(X_test)

### Evaluation Criteria

In [None]:
lstm_mse = mean_squared_error(y_test, prediction)
lstm_mae = mean_absolute_error(y_test, prediction)
lstm_r2 = r2_score(y_test, prediction)
lstm_evs = explained_variance_score(y_test, prediction)

In [None]:
print(f'LSTM MSE:{lstm_mse}')
print(f'LSTM MAE:{lstm_mae}')
print(f'LSTM r2:{lstm_r2}')
print(f'LSTM EVS:{lstm_evs}')

## Attention Layer

In [None]:
# https://github.com/philipperemy/keras-attention-mechanism
class Attention_layer(Layer):

    def __init__(self, units=128, **kwargs):
        self.units = units
        super().__init__(**kwargs)

    def __call__(self, inputs):
        """
        Many-to-one attention mechanism for Keras.
        @param inputs: 3D tensor with shape (batch_size, time_steps, input_dim).
        @return: 2D tensor with shape (batch_size, 128)
        @author: felixhao28, philipperemy.
        """
        hidden_states = inputs
        hidden_size = int(hidden_states.shape[2])
        # Inside dense layer
        #              hidden_states            dot               W            =>           score_first_part
        # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
        # W is the trainable weight matrix of attention Luong's multiplicative style score
        score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states)
        #            score_first_part           dot        last_hidden_state     => attention_weights
        # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
        h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states)
        score = Dot(axes=[1, 2], name='attention_score')([h_t, score_first_part])
        attention_weights = Activation('softmax', name='attention_weight')(score)
        # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
        context_vector = Dot(axes=[1, 1], name='context_vector')([hidden_states, attention_weights])
        pre_activation = Concatenate(name='attention_output')([context_vector, h_t])
        attention_vector = Dense(self.units, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
        return attention_vector

    def get_config(self):
        return {'units': self.units}

    @classmethod
    def from_config(cls, config):
        return cls(**config)

## Attention LSTM + GRU Units

In [None]:
def generate_lstm(input_shape, NB_CLASS, NUM_CELLS=64):
    ip = Input(shape=input_shape)
    y = LSTM(64, activation='relu', return_sequences=True)(ip)
    y = Attention_layer(NUM_CELLS)(y)
    
    x = tf.keras.layers.GRU(NUM_CELLS, activation='relu', return_sequences=True)(ip)
    x = tf.keras.layers.GRU(128, return_sequences=True)(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.Dropout(0.25)(x)
    x = tf.keras.layers.GRU(10, return_sequences=False)(x)
    x = tf.keras.layers.Activation('relu')(x)
    
    
    x = tf.keras.layers.concatenate([x, y])
    out = Dense(NB_CLASS)(x)
    
    model = Model(ip, out)

    return model

In [None]:
input_shape = (36, 29)
output_shape = 1
model_attnlstm = generate_lstm(input_shape, output_shape)
model_attnlstm.summary()
sgd = tf.keras.optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model_attnlstm.compile(optimizer='adam', loss=rmspe)

In [None]:
epochs = 50
history = model_attnlstm.fit_generator(generator=training_generator, 
                              callbacks=[model_checkpoint_callback, model_earlystopping_callback], 
                              epochs=epochs, 
                              validation_data=validation_generator, 
                              use_multiprocessing=False, 
                              workers=n_thread)

## Training vs Validation Loss

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Loss Graph")
plt.show()

In [None]:
attn_prediction = model_attnlstm.predict(X_test)

## Evaluation Criteria

In [None]:
attn_lstm_mse = mean_squared_error(attn_prediction, y_test)
attn_lstm_mae = mean_absolute_error(attn_prediction, y_test)
attn_lstm_r2 = r2_score(attn_prediction, y_test)
attn_lstm_evs = explained_variance_score(attn_prediction, y_test)
attn_lstm_rmspe = rmspe(attn_prediction, y_test)

In [None]:
print(f'ATTNLSTM MSE:{attn_lstm_mse}')
print(f'ATTNLSTM MAE:{attn_lstm_mae}')
print(f'ATTNLSTM r2:{attn_lstm_r2}')
print(f'ATTNLSTM EVS:{attn_lstm_evs}')
print(f'ATTNLSTM RMSPE:{attn_lstm_rmspe}')