# Forecast of Volatility using LSTM

In [1]:
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, SimpleRNN
from keras import backend as K
from keras import regularizers
import matplotlib.pyplot as plt
from IPython.display import display, Image
import sys, glob
import warnings
warnings.filterwarnings("ignore")

  from pandas.util.testing import assert_frame_equal
Using TensorFlow backend.


In [2]:
#Summary Table
csv = (glob.glob('*.csv'))[0]
print ('Filename for Summary Table: ' + csv)

#Data Preparation from CSV  
df_Summary = pd.read_csv(csv,index_col=0,engine = 'python')
n_days = 252 #Number of days to use as dataset based on average trading days per year 

#Define empty array
dataset = np.empty((n_days, 0)) # locate the array

#Create Dataset
for i in range(0, len(df_Summary)):
    try:
        ticker = df_Summary['Ticker'][i]
        df_ticker = pd.read_csv('Data//'+ ticker + '//' + ticker + '.csv',engine = 'python')

        #Create new dataset array based on 'Return_log' and 'Volatility'
        asset=df_ticker[['Ret_Log','Vol']].tail(n_days).values
        dataset = np.append(dataset, asset, axis=1)
    
    except:
        continue

Filename for Summary Table: Summary_DJIA30.csv


In [3]:
#Implementing the LSTM model

# Normalize data
factor = 2

# Calculate second raw moment
M2 = np.mean(dataset ** 2, axis=0) ** (1/2)

# Apply scaling
dataset_norm = (1/factor) * (dataset / M2)

def create_dataset(dataset, look_back=1):
    """
    Function to convert series from dataset to supervised learning problem
    """
    data_x, data_y = [], []

    for i in range(len(dataset) - look_back):

        # Create sequence of length equal to look_back
        x = dataset[i:(i + look_back), :]
        data_x.append(x)

        # Take just the volatility for the target
        data_y.append(dataset[i + look_back, 1::2])

    return np.array(data_x), np.array(data_y)

# Convert series to supervised learning model
look_back = 20
X, y = create_dataset(dataset_norm, look_back)

# Declare variables
n_features = dataset.shape[1]
n_assets = y.shape[1]


In [4]:
# Split dataset
training_days = 90
X_train, X_test = X[:training_days], X[training_days:]
y_train, y_test = y[:training_days], y[training_days:]

# Prepare the 3D input vector for the LSTM
X_train = np.reshape(X_train, (-1, look_back, n_features))
X_test = np.reshape(X_test, (-1, look_back, n_features))

# Interested on 1 step ahead of preductions on the testing set
batch_size = 1

# Create a sequential model in order to add layers to the model
model = Sequential()

# LSTM considers the arguments the number of units and the input shape. 
# Regularisation is to deal with the undesired overfitting
model.add(LSTM(len(dataset[0]),
               input_shape=(look_back, n_features),
               batch_size=batch_size,
               stateful=True,
               activity_regularizer=regularizers.l1_l2(),
               recurrent_regularizer=regularizers.l1_l2()))

# Setting dropout rate as 0 during training
model.add(Dropout(0.2))

# Dense layer are the number of unit and non-linear function based on sigmoid
model.add(Dense(n_assets, activation='sigmoid')) 


# Compile the LSTM model
model.compile(loss='mse', optimizer='rmsprop')

ValueError: cannot reshape array of size 0 into shape (20,0)

In [None]:
num_epochs = 20

# Fit the model
for i in range(num_epochs):

    model.fit(X_train,
              y_train,
              batch_size=batch_size,
              shuffle=False,
              epochs=1,
              verbose=0) # Verbosity mode 0: silent

    model.reset_states()

In [None]:
# Make a prediction (rolling test window)
y_pred = np.empty((0, n_assets))

for i in range(len(X_test)):
    X_i = X_test[i].reshape(1, look_back, n_features)
    predicted_output = model.predict(X_i, batch_size=batch_size)

    # Reshape prediction to save into array
    predicted_output = predicted_output.reshape(1, n_assets)
    y_pred = np.append(y_pred, predicted_output, axis=0)

y_pred = y_pred.reshape(-1, n_assets)
y_true = y_test.reshape(-1, n_assets)

# Invert scaling
def invert_standardization(data, M2, factor):
  
    # Consider just volatility series
    M2 = M2[1::2]

    data = factor * data * M2

    return data

# Apply inversion
y_pred = invert_standardization(y_pred, M2, factor)
y_true = invert_standardization(y_true, M2, factor)


In [None]:
def evaluate(y_true, y_pred):
    """
    Function to calculate MSE and QLIKE
    """

    mse = []
    qlike = []

    for i in range(0, int(len(dataset[0])/2)):
        ticker = df_Summary['Ticker'][i]
        mse_i = (y_true[:, i] - y_pred[:, i]) ** 2
        qlike_i = np.log(y_pred[:, i]) + (y_true[:, i] /  y_pred[:, i])
       
        # save results (point by point)
        results = np.array([mse_i, qlike_i]).transpose()
        np.savetxt('Data//' + ticker + '//' + ticker + '_LTSM.csv', results, 
                   delimiter=',', header='MSE, Q-LIKE', fmt='%10.5f', comments='')
        
        mse.append(np.mean(mse_i, axis=0))
        qlike.append(np.mean(qlike_i, axis=0))

    return mse, qlike

# Apply EVALUATE function to predictions
mse, qlike = evaluate(y_true, y_pred)

# save results
results = np.array([mse, qlike]).transpose()

df = pd.DataFrame({'MSE_LTSM': mse, 'QLIKE_LTSM': qlike})
print(df.describe())

df_Summary['MSE_LTSM'] = df['MSE_LTSM'] 
df_Summary['QLIKE_LTSM'] = df['QLIKE_LTSM'] 
df_Summary.to_csv(csv,encoding='utf-8', index=True, header = True)