In [1]:
# Import libraries
import os, glob
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, SimpleRNN
from keras import backend as K
from keras import regularizers
# Set GPU device
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=""
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Using TensorFlow backend.


In [2]:
def qlike(y_true, y_pred):
    return K.mean((K.log(y_pred) + (y_true / y_pred)), axis=-1)

In [3]:
#Summary Table
csv = (glob.glob('*.csv'))[0]
print ('Filename for Summary Table: ' + csv)

#Data Preparation from CSV  
df_Summary = pd.read_csv(csv,index_col=0,engine = 'python')
n_days = 252 #Number of days to use as dataset based on average trading days per year 

#Define empty array
dataset = np.empty((n_days, 0)) # Preallocate the array

#Create Dataset
for i in range(0, len(df_Summary)):
    try:
        ticker = df_Summary['Ticker'][i]
        df_ticker = pd.read_csv('Data//'+ ticker + '//' + ticker + '.csv',engine = 'python')

        #Create new dataset array based on 'Return_log' and 'Realized_Volatility_20D'
        asset=df_ticker[['Ret_Log','R_Vol_20D']].tail(n_days).values
        dataset = np.append(dataset, asset, axis=1)
    
    except:
        continue

Filename for Summary Table: Summary_DJIA30.csv


In [4]:
# Normalize data
factor = 2

# Calculate second raw moment
M2 = np.mean(dataset ** 2, axis=0) ** (1/2)

# Apply scaling
dataset_norm = (1/factor) * (dataset / M2)

In [5]:
def create_dataset(dataset, look_back=1):
    """
    Function to convert series from dataset to supervised learning
    """
    data_x, data_y = [], []

    for i in range(len(dataset) - look_back):

        # Create sequence of length equal to look_back
        x = dataset[i:(i + look_back), :]
        data_x.append(x)

        # Take just the volatility for the target
        data_y.append(dataset[i + look_back, 1::2])

    return np.array(data_x), np.array(data_y)

In [6]:
# Convert series to supervised learning problem
look_back = 20
X, y = create_dataset(dataset_norm, look_back)

# Declare variables
n_features = dataset.shape[1]
n_assets = y.shape[1]

In [7]:
# Split dataset
training_days = 90
X_train, X_test = X[:training_days], X[training_days:]
y_train, y_test = y[:training_days], y[training_days:]

# Prepare the 3D input vector for the LSTM
X_train = np.reshape(X_train, (-1, look_back, n_features))
X_test = np.reshape(X_test, (-1, look_back, n_features))

ValueError: cannot reshape array of size 0 into shape (20,0)

In [None]:
batch_size = 1

# Create the model
model = Sequential()
model.add(SimpleRNN(len(dataset[0]),
               input_shape=(look_back, n_features),
               batch_size=batch_size,
               return_sequences=True,
               stateful=True,
               activity_regularizer=regularizers.l1_l2(),
               recurrent_regularizer=regularizers.l1_l2()))
model.add(Dropout(0.2))
model.add(SimpleRNN(int(len(dataset[0])/2),
               return_sequences=False,
               stateful=True,
               activity_regularizer=regularizers.l1_l2(),
               recurrent_regularizer=regularizers.l1_l2()))
model.add(Dense(n_assets, activation='softplus'))          

In [None]:
# Compile the LSTM model
model.compile(loss=qlike, optimizer='rmsprop')

In [None]:
## Training and evaluating the model (On-line learning)

# Create empty arrays
y_pred = np.empty((0, n_assets))
y_true = np.empty((0, n_assets))

for j in range(training_days - look_back + 1, X.shape[0]):

    if j == (training_days - look_back + 1):

        # First training days for training
        X_train = X[:j]
        y_train = y[:j]

        # Next day for forecasting
        X_test = X[j].reshape(1, look_back, n_features)

        # Ensure the correct shape for LSTM
        X_test = np.tile(X_test, (batch_size, 1, 1))
        y_test = np.tile(y[j], (batch_size, 1))

        # Training epochs
        epochs = 90
    
    else:

        # Available data to refine network state
        X_train = X_test
        y_train = y_test

        # Ensure the correct shape for LSTM
        X_test = X[j].reshape(1, look_back, n_features)
        X_test = np.tile(X_test, (batch_size, 1, 1))
        y_test = np.tile(y[j], (batch_size, 1))

        # Epochs for updating
        epochs = 20
        
    # Fit the model
    for i in range(epochs):
        model.fit(X_train,
                  y_train,
                  epochs=1,
                  batch_size=batch_size,
                  verbose=0,
                  shuffle=False)
        model.reset_states()
    
    # Evaluate the model
    # Make predictions
    predicted_output = model.predict(X_test, batch_size=batch_size)

    predicted_output = predicted_output[0].reshape(1, n_assets)
    true_output = y_test[0].reshape(1, n_assets)

    # Save current prediction into an array
    y_pred = np.append(y_pred, predicted_output, axis=0)
    y_true = np.append(y_true, true_output, axis=0)

In [None]:
# Invert scaling
def invert_standardization(data, M2, factor):
  
    # Consider just volatility series
    M2 = M2[1::2]

    data = factor * data * M2

    return data

# Apply inversion
y_pred = invert_standardization(y_pred, M2, factor)
y_true = invert_standardization(y_true, M2, factor)

In [None]:
def evaluate(y_true, y_pred):
    """
    Function to calculate MSE and QLIKE
    """

    mse = []
    qlike = []

    for i in range(0, int(len(dataset[0])/2)):
        ticker = df_Summary['Ticker'][i]
        mse_i = (y_true[:, i] - y_pred[:, i]) ** 2
        qlike_i = np.log(y_pred[:, i]) + (y_true[:, i] /  y_pred[:, i])

        # save results (point by point)
        results = np.array([mse_i, qlike_i]).transpose()
        np.savetxt('Data//' + ticker + '//' + ticker + '_RNN.csv', results, 
                   delimiter=',', header='MSE, Q-LIKE', fmt='%10.5f', comments='')
        mse.append(np.mean(mse_i, axis=0))
        qlike.append(np.mean(qlike_i, axis=0))

    return mse, qlike

In [None]:
# Apply EVALUATE function to predictions
mse, qlike = evaluate(y_true, y_pred)

# save results
results = np.array([mse, qlike]).transpose()


df = pd.DataFrame({'MSE_RNN': mse, 'QLIKE_RNN': qlike})
print(df.describe())

df_Summary['MSE_RNN'] = df['MSE_RNN'] 
df_Summary['QLIKE_RNN'] = df['QLIKE_RNN'] 
df_Summary.to_csv(csv,encoding='utf-8', index=True, header = True)