# Quantiative Algorithm using Technical Indicators

Technical data obtained from TradingView

ToDo:

Add LSTM Layers

Fit & Train Model

Test Model

Optimize

## The Data

In [2]:
import pandas as pd
import numpy as np

In [3]:
stock_data = pd.read_csv('SPY Original.csv')
stock_data = stock_data.drop('timestamp', axis=1)
stock_data = stock_data.replace(to_replace='None', value=np.nan).dropna()
stock_data = stock_data.astype(float)

In [4]:
stock_data.head()

Unnamed: 0,%,open,high,low,close,vol,vol_20d_MA,BB_1,BB_2,BB_3,...,OBV,ADX,MACD_1,MACD_2,MACD_3,Stoch_RSI_1,Stoch_RSI_2,Klinger_Oscillator_1,Klinger_Oscillator_2,CMF
66,0.28,44.46875,44.75,44.46875,44.59375,109000.0,127250.0,44.3625,45.287289,43.437711,...,542400.0,24.91018,0.048263,-0.130486,-0.178749,91.984182,77.648061,7007.894307,-2410.744487,0.050708
67,-0.35,44.53125,44.5625,44.40625,44.4375,54700.0,128585.0,44.367188,45.292499,43.441876,...,487700.0,24.337024,0.05773,-0.106587,-0.164317,93.750468,87.75418,5122.105202,-1334.623103,0.026531
68,-0.21,44.46875,44.46875,44.28125,44.34375,68000.0,122945.0,44.370312,45.294864,43.445761,...,419700.0,24.095542,0.056152,-0.094126,-0.150279,89.118202,91.617617,3112.123261,-699.373622,0.029034
69,0.21,44.40625,44.6875,44.40625,44.4375,113900.0,111215.0,44.346875,45.239094,43.454656,...,533600.0,23.14129,0.059573,-0.075813,-0.135385,85.022031,89.2969,5176.186367,139.99209,-0.129592
70,0.42,44.4375,44.625,44.3125,44.625,42600.0,106040.0,44.328125,45.17948,43.47677,...,576200.0,22.494517,0.071794,-0.045643,-0.117437,89.223491,87.787908,5514.378055,907.761514,-0.11583


In [5]:
stock_data.shape

(6520, 26)

In [6]:
stock_data = stock_data.dropna()

In [7]:
stock_data.shape

(6520, 26)

In [8]:
# Split by time series, applying stratified sampling is wrong
Train = stock_data.iloc[:5500,:]
Valid = stock_data.iloc[5501:6000,:]
Test = stock_data.iloc[6001:,:]

In [9]:
# Selecting the input as all features till the day befor last
X_train = Train.iloc[:-1,1:]
X_valid = Valid.iloc[:-1,1:]
X_test = Test.iloc[:-1,1:]

In [10]:
# Specifying the output as the next day
y_train = Train.iloc[1:,0].values
y_test = Test.iloc[1:,0].values
y_valid = Valid.iloc[1:,0].values

In [11]:
columns = X_train.columns
train_ind = X_train.index
test_ind = X_test.index
valid_ind = X_valid.index

### Scale the Data

In [12]:
from sklearn.preprocessing import MinMaxScaler
# Scaling the input
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
# appling the scaling trained on the train data
X_test = scaler.transform(X_test)
X_valid = scaler.transform(X_valid)

In [13]:
# Scaling the output as it has negative using another scaler
scaler2 = MinMaxScaler()
y_train = scaler2.fit_transform(y_train.reshape((-1,1)))
y_test = scaler2.transform(y_test.reshape((-1,1)))
y_valid = scaler2.transform(y_valid.reshape((-1,1)))

In [14]:
X_train = pd.DataFrame(data=X_train,columns = columns,index=train_ind)
X_test = pd.DataFrame(data=X_test,columns = columns,index=test_ind)
X_valid = pd.DataFrame(data=X_valid,columns = columns,index=valid_ind)

In [15]:
X_train = X_train.reset_index(drop=True)

## Keras Modeling

In [23]:
from keras.layers import concatenate,Dropout,Flatten, LSTM,BatchNormalization, Reshape,GlobalAveragePooling1D, Input, Dense, Conv2D, GlobalAveragePooling1D, Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras.models import Model
from keras.utils import plot_model, np_utils
from keras.optimizers import Adam

In [24]:
#Global Setup
time_period = len(X_train)

In [25]:
#Note: Output Shape is (Input Height - Filter Height + 1 (if strides=1))

#Trend Indicators

#MACD Setup
macd_df = pd.concat([X_train['MACD_1'], X_train['MACD_2'], X_train['MACD_3']], axis=1)
macd_outputs = len(macd_df.columns)
#Ichimoku Setup
ichimoku_df = pd.concat([X_train['Ichimoku _1'], X_train['Ichimoku_2'], X_train['Ichimoku_3'], X_train['Ichimoku_4'], X_train['Ichimoku_5']], axis=1)
ichimoku_outputs = len(ichimoku_df.columns)
#SAR Setup
sar_df = pd.concat([X_train['SAR']], axis=1)
sar_outputs = len(sar_df.columns)

#Volatility Indicators

#Bollinger Bands
bollinger_df = pd.concat([X_train['BB_1'], X_train['BB_2'], X_train['BB_3']], axis=1)
bollinger_outputs = len(bollinger_df.columns)

#Momentum Indicators

#stochRSI Setup
stochRSI_df = pd.concat([X_train['Stoch_RSI_1'], X_train['Stoch_RSI_2']], axis=1)
stochRSI_outputs = len(stochRSI_df.columns)
#adx Setup
adx_df = pd.concat([X_train['ADX']], axis=1)
adx_outputs = len(adx_df.columns)

#Volume Indicators

#KlingerVol Setup
klingerVol_df = pd.concat([X_train['Klinger_Oscillator_1'], X_train['Klinger_Oscillator_2']], axis=1)
klingerVol_outputs = len(klingerVol_df.columns)
#OBV Setup
obv_df = pd.concat([X_train['OBV']], axis=1)
obv_outputs = len(obv_df.columns)
#CMF Setup
cmf_df = pd.concat([X_train['CMF']], axis=1)
cmf_outputs = len(cmf_df.columns)

In [26]:
def extract_input_features(X_train):
    #MACD Setup
    macd_df = pd.concat([X_train['MACD_1'], X_train['MACD_2'], X_train['MACD_3']], axis=1)
    macd_outputs = len(macd_df.columns)
    #Ichimoku Setup
    ichimoku_df = pd.concat([X_train['Ichimoku _1'], X_train['Ichimoku_2'], X_train['Ichimoku_3'], X_train['Ichimoku_4'], X_train['Ichimoku_5']], axis=1)
    ichimoku_outputs = len(ichimoku_df.columns)
    #SAR Setup
    sar_df = pd.concat([X_train['SAR']], axis=1)
    sar_outputs = len(sar_df.columns)

    #Volatility Indicators

    #Bollinger Bands
    bollinger_df = pd.concat([X_train['BB_1'], X_train['BB_2'], X_train['BB_3']], axis=1)
    bollinger_outputs = len(bollinger_df.columns)

    #Momentum Indicators

    #stochRSI Setup
    stochRSI_df = pd.concat([X_train['Stoch_RSI_1'], X_train['Stoch_RSI_2']], axis=1)
    stochRSI_outputs = len(stochRSI_df.columns)
    #adx Setup
    adx_df = pd.concat([X_train['ADX']], axis=1)
    adx_outputs = len(adx_df.columns)

    #Volume Indicators

    #KlingerVol Setup
    klingerVol_df = pd.concat([X_train['Klinger_Oscillator_1'], X_train['Klinger_Oscillator_2']], axis=1)
    klingerVol_outputs = len(klingerVol_df.columns)
    #OBV Setup
    obv_df = pd.concat([X_train['OBV']], axis=1)
    obv_outputs = len(obv_df.columns)
    #CMF Setup
    cmf_df = pd.concat([X_train['CMF']], axis=1)
    cmf_outputs = len(cmf_df.columns)
    X_input_list = [macd_df.values, ichimoku_df.values, sar_df.values, bollinger_df.values, stochRSI_df.values, 
                    adx_df.values, klingerVol_df.values, obv_df.values, cmf_df.values]
    return X_input_list

In [27]:
## Main edits on teh architecture
#1 adda dense layer after each input
#2 reshaping it to 2d tensor
#3 adding batch normalization after the final concatenation to reduce overfitting
#4 addding the LSTM
# the final layer must be 1
# loss can't be cross_entropy as it's for classification

In [28]:
#MACD
#Conv1D Shape [# Filters, Filter Height]
visible_macd = Input(shape=(macd_outputs,))
x = Dense(64)(visible_macd)
x = Reshape((64,1))(x)
conv_macd = Conv1D(32, 5, name='3MACD_Conv', activation='relu')(x)
pool_macd = MaxPooling1D(3)(conv_macd)
flat_macd = Flatten()(pool_macd)

#Ichimoku
#Conv1D Shape [# Filters, Filter Height]
visible_ichimoku = Input(shape=(ichimoku_outputs,))
x = Dense(64)(visible_ichimoku)
x = Reshape((64,1))(x)
conv_ichimoku = Conv1D(32, 5, name='5Ichimoku_Conv', activation='relu')(x)
pool_ichimoku = MaxPooling1D(3)(conv_ichimoku)
flat_ichimoku = Flatten()(pool_ichimoku)

#SAR
#Conv1D Shape [# Filters, Filter Height]
visible_sar = Input(shape=(sar_outputs,))
x = Dense(64)(visible_sar)
x = Reshape((64,1))(x)
conv_sar = Conv1D(32, 5, activation='relu')(x)

pool_sar = MaxPooling1D(3)(conv_sar)
flat_sar = Flatten()(pool_sar)

#All Trend Indicators
merge_trend = concatenate([flat_macd, flat_ichimoku, flat_sar])
dense_trend = Dense(100, activation='relu')(merge_trend)

In [29]:
#Conv1D Shape [# Filters, Filter Height]
visible_bollinger = Input(shape=(bollinger_outputs,))
x = Dense(64)(visible_bollinger)
x = Reshape((64,1))(x)
conv_bollinger = Conv1D(32, 5, name='3Bollinger_Conv', activation='relu')(x)
pool_bollinger = MaxPooling1D(3)(conv_bollinger)
flat_bollinger = GlobalAveragePooling1D()(pool_bollinger)

#All Volatility Indicators
# merge_volatility = concatenate([flat_bollinger])
dense_volatility = Dense(100, activation='relu')(flat_bollinger)

In [30]:
#stochRSI
#Conv1D Shape [# Filters, Filter Height]
visible_stochRSI = Input(shape=(stochRSI_outputs,))
x = Dense(64)(visible_stochRSI)
x = Reshape((64,1))(x)
conv_stochRSI = Conv1D(32, 5, name='2StochRSI_Conv', activation='relu')(x)
pool_stochRSI = MaxPooling1D(3)(conv_stochRSI)
flat_stochRSI = GlobalAveragePooling1D()(pool_stochRSI)

#adx
#Conv1D Shape [# Filters, Filter Height]
visible_adx = Input(shape=(adx_outputs,))
x = Dense(64)(visible_adx)
x = Reshape((64,1))(x)
conv_adx = Conv1D(32, 5, activation='relu')(x)
pool_adx = MaxPooling1D(3)(conv_adx)
flat_adx = GlobalAveragePooling1D()(pool_adx)

#All Momentum Indicators
merge_momentum = concatenate([flat_stochRSI, flat_adx])
dense_momentum = Dense(100, activation='relu')(merge_momentum)

In [31]:
#KlingerVol
#Conv1D Shape [# Filters, Filter Height]
visible_klingerVol = Input(shape=(klingerVol_outputs,))
x = Dense(64)(visible_klingerVol)
x = Reshape((64,1))(x)
conv_klingerVol = Conv1D(32, 5, activation='relu')(x)
pool_klingerVol = MaxPooling1D(3)(conv_klingerVol)
flat_klingerVol = GlobalAveragePooling1D()(pool_klingerVol)

#OBV
#Conv1D Shape [# Filters, Filter Height]
visible_obv = Input(shape=(obv_outputs,))
x = Dense(64)(visible_obv)
x = Reshape((64,1))(x)
conv_obv = Conv1D(32, 5, activation='relu')(x)
pool_obv = MaxPooling1D(3)(conv_obv)
flat_obv = GlobalAveragePooling1D()(pool_obv)

#CMF
#Conv1D Shape [# Filters, Filter Height]
visible_cmf = Input(shape=(cmf_outputs,))
x = Dense(64)(visible_cmf)
x = Reshape((64,1))(x)
conv_cmf = Conv1D(32, 5, activation='relu')(x)
pool_cmf = MaxPooling1D(3)(conv_cmf)
flat_cmf = GlobalAveragePooling1D()(pool_cmf)

#All Volume Indicators
merge_volume = concatenate([flat_klingerVol, flat_obv, flat_cmf])
dense_volume = Dense(100, activation='relu')(merge_volume)

In [37]:
#All Indicators
merge_all = concatenate([dense_trend, dense_volatility, dense_momentum, dense_volume])
#Final Output
output = BatchNormalization()(merge_all)
output = Dense(32, activation='relu')(output)
output = Reshape((16,2))(output)
output = LSTM(16, activation='relu')(output)
output = Dense(1)(output)

#Create Model
visibles_list = [visible_macd, visible_ichimoku, visible_sar, visible_bollinger, visible_stochRSI, visible_adx, visible_klingerVol, visible_obv, visible_cmf]
model = Model(inputs=visibles_list, outputs=output)
#Print Model Text Summary
print(model.summary())
#Save Model Graph
plot_model(model, to_file='./model.png', show_shapes=True)

#Model Creation & Usage

model.compile(loss=['mse'], optimizer='adam',
              metrics=['mse'])

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 3)            0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 5)            0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 2)            0                                            
__________________________________________________________________________________________________
input_7 (I

In [33]:
X_input_list = [macd_df.values, ichimoku_df.values, sar_df.values, bollinger_df.values,
                stochRSI_df.values, adx_df.values, klingerVol_df.values, obv_df.values, cmf_df.values]

In [34]:
# Extract the featues as a list of arrays for validation and test data
X_valid_list = extract_input_features(X_valid)
X_test_list = extract_input_features(X_test)

In [38]:
from sklearn.metrics import mean_squared_error
from keras.callbacks import  ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Saving the model
checkpoint = ModelCheckpoint(filepath='model.hdfs',
                             save_weights_only=False,
                             monitor='val_loss',save_best_only=True)
# Stopping the model if her is no upfdte in the val loss
earlyStop = EarlyStopping(monitor='val_loss', min_delta=0, patience=15, verbose=0, mode='auto')

# REduce learning rate
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8)

model.fit(x=X_input_list, y=y_train, epochs=100, batch_size=16, verbose=2,validation_data=[X_valid_list,y_valid])

Train on 5499 samples, validate on 498 samples
Epoch 1/100
 - 9s - loss: 0.0163 - mean_squared_error: 0.0163 - val_loss: 0.0018 - val_mean_squared_error: 0.0018
Epoch 2/100
 - 7s - loss: 0.0026 - mean_squared_error: 0.0026 - val_loss: 0.0014 - val_mean_squared_error: 0.0014
Epoch 3/100
 - 7s - loss: 0.0026 - mean_squared_error: 0.0026 - val_loss: 0.0032 - val_mean_squared_error: 0.0032
Epoch 4/100
 - 7s - loss: 0.0025 - mean_squared_error: 0.0025 - val_loss: 0.0015 - val_mean_squared_error: 0.0015
Epoch 5/100
 - 7s - loss: 0.0025 - mean_squared_error: 0.0025 - val_loss: 0.0016 - val_mean_squared_error: 0.0016
Epoch 6/100
 - 7s - loss: 0.0025 - mean_squared_error: 0.0025 - val_loss: 0.0013 - val_mean_squared_error: 0.0013
Epoch 7/100
 - 9s - loss: 0.0025 - mean_squared_error: 0.0025 - val_loss: 0.0014 - val_mean_squared_error: 0.0014
Epoch 8/100
 - 7s - loss: 0.0025 - mean_squared_error: 0.0025 - val_loss: 0.0013 - val_mean_squared_error: 0.0013
Epoch 9/100
 - 7s - loss: 0.0025 - mean_s

Epoch 72/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.0014 - val_mean_squared_error: 0.0014
Epoch 73/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.0014 - val_mean_squared_error: 0.0014
Epoch 74/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.0013 - val_mean_squared_error: 0.0013
Epoch 75/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.0013 - val_mean_squared_error: 0.0013
Epoch 76/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.0014 - val_mean_squared_error: 0.0014
Epoch 77/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.0013 - val_mean_squared_error: 0.0013
Epoch 78/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.0014 - val_mean_squared_error: 0.0014
Epoch 79/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.0013 - val_mean_squared_error: 0.0013
Epoch 80/100
 - 7s - loss: 0.0024 - mean_squared_error: 0.0024 - val_loss: 0.001

<keras.callbacks.History at 0x7fc0dc045160>

In [40]:
pred = model.predict(X_test_list)
test_mse = mean_squared_error(y_test,pred)
print('Test MSE is : ' + str(test_mse))

Test MSE is : 0.0012101602286106344


In [41]:
actual_pred = scaler2.inverse_transform(pred)
actual_y = scaler2.inverse_transform(y_test)

In [44]:
pred_df = pd.DataFrame()
pred_df['Y'] = np.concatenate(actual_y)
pred_df['pred'] = np.concatenate(actual_pred)

In [46]:
pred_df.head()

Unnamed: 0,Y,pred
0,-0.3,0.24936
1,-0.3,0.265385
2,-0.19,0.228873
3,0.13,0.129518
4,0.35,0.124298
