In [None]:
import os
import pandas as pd
import numpy as np
import math
import datetime as dt
from datetime import datetime
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

!pip install yfinance
import yfinance as yf

!pip install finta
from finta import TA

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# Obtain stock data
maindf = yf.download('GOOG')

maindf.reset_index(inplace=True) # convert Date from index column to an actual column
maindf = maindf.rename(columns={'Date': 'date','Open':'open','High':'high','Low':'low','Close':'close',
                                'Adj Close':'adj_close','Volume':'volume'})
maindf['date'] = pd.to_datetime(maindf.date)
maindf.head()

print("Total number of days: ",maindf.shape[0])
print("Total number of fields: ",maindf.shape[1])

print("Null values:", maindf.isnull().values.sum())
print("NA values:", maindf.isna().values.any())

# Segmentation

# Feature #1: On Balance Volume
# just dVolume basically from the previous day (V{i} - V{i-1})
lst = [0]
for i in range(1,len(maindf)):
  lst.append(maindf["volume"][i] - maindf["volume"][i-1])
dV = pd.DataFrame({"dVolume":lst})

# F2/F3 setup
ohlc = maindf

# Feature #2: Exponential moving average (EMA)
emaAvg = TA.EMA(ohlc)

# Feature #3: Bollinger bands
bbs = TA.BBANDS(ohlc)

# Feature #4: dPrice
lst = [0]
for i in range(1,len(maindf)):
  lst.append(maindf["close"][i] - maindf["close"][i-1])
dPrice = pd.DataFrame({"dPrice":lst})

# Mask
pattern = '%Y-%m-%d'

startDate = datetime.strptime("2016-08-01", pattern)
endDate = datetime.strptime("2020-09-01", pattern)

mask = (maindf['date'] >= startDate) & (maindf['date'] <= endDate)

# Next day's prices that we want

# Create total y data vec
totalY_data = []
for i in range(len(maindf)):
  if mask[i] == True:
      totalY_data.append(np.transpose([maindf["close"][i+1]]))


# # Crop data
maindf = maindf[mask]
allDates = list(maindf['date'].copy())
emaAvg = emaAvg[mask]
dV = dV[mask]
dPrice = dPrice[mask]
bbs = bbs[mask]

maindf = maindf.reset_index()
emaAvg = emaAvg.reset_index(drop=True)
dV = dV.reset_index()
bbs = bbs.reset_index()
dPrice = dPrice.reset_index()

## Plotting
names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])

fig = px.line(maindf, x=maindf.date, y=[maindf['open'], maindf['close'], 
                                          maindf['high'], maindf['low']],
             labels={'date': 'Date [datetime]','value':'Stock value [USD]'})
fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()


names2 = cycle(['Stock Value Difference'])

fig2 = px.line(maindf, x=maindf.date, y=[dPrice["dPrice"]],
             labels={'date': 'Date [datetime]','value':'Stock Value Difference [USD]'})
fig2.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
fig2.for_each_trace(lambda t:  t.update(name = next(names2)))
fig2.update_xaxes(showgrid=False)
fig2.update_yaxes(showgrid=False)

fig2.show()

[*********************100%***********************]  1 of 1 completed
Total number of days:  4616
Total number of fields:  7
Null values: 0
NA values: False


In [None]:
# Create total X data vec
totalX_data = []
for i in range(len(maindf)):
  intermediateFeatureVec = [maindf["close"][i], dPrice["dPrice"][i], dV["dVolume"][i], emaAvg[i], bbs["BB_UPPER"][i], bbs["BB_MIDDLE"][i], bbs["BB_LOWER"][i]]
  totalX_data.append(np.transpose(intermediateFeatureVec))
  #maindf["close"][i], dPrice["dPrice"][i], dV["dVolume"][i], emaAvg[i], bbs["BB_UPPER"][i], bbs["BB_MIDDLE"][i], bbs["BB_LOWER"][i]
  

# Total y data vec was made previously 

# print("totalX_data:\n",totalX_data,"\nshape:",np.shape(totalX_data))
# print("\n")
# print("totalY_data:\n",totalY_data,"\nshape:",np.shape(totalY_data))


In [None]:
# Normalize input data
xNormaliser = MinMaxScaler()
xData_normalised = xNormaliser.fit_transform(totalX_data)

yNormaliser = MinMaxScaler()
yData_normalised = yNormaliser.fit_transform(totalY_data)

# Use history to prepare data
history_points = 21 # about ~21 days of stock trade per month
X_total = np.array([xData_normalised[i : i + history_points].copy() for i in range(len(xData_normalised) - history_points)])
y_total = np.array([yData_normalised[i + history_points - 1].copy() for i in range(len(yData_normalised) - history_points)])
datesPredicted = np.array([allDates[i + history_points - 1] for i in range(len(allDates) - history_points)])

# Split into train/validation data and test data
trainTestSplitRatio = .7
splitLoc = int(round(trainTestSplitRatio * len(totalX_data)))
X_train = X_total[:splitLoc]
y_train = y_total[:splitLoc]
dates_train = datesPredicted[:splitLoc]

X_test = X_total[splitLoc:]
y_test = y_total[splitLoc:]
dates_test = datesPredicted[splitLoc:]

print("First 25 prices:")
for i in range(25):
    print("i = ",i,": $",totalY_data[i][0],sep="")

print("xTrain shape:\n",np.shape(X_train))
print("xTrain:\n",X_train)
print("\n")
print("y_train shape:\n",np.shape(y_train))
print("y_train:\n",y_train)
print("\n")

First 25 prices:
i = 0: $38.55350112915039
i = 1: $38.659000396728516
i = 2: $38.580501556396484
i = 3: $39.111000061035156
i = 4: $39.0880012512207
i = 5: $39.2130012512207
i = 6: $39.23400115966797
i = 7: $39.24250030517578
i = 8: $39.1609992980957
i = 9: $39.12200164794922
i = 10: $38.856998443603516
i = 11: $38.99549865722656
i = 12: $38.875
i = 13: $38.770999908447266
i = 14: $38.60749816894531
i = 15: $38.604000091552734
i = 16: $38.481998443603516
i = 17: $38.47050094604492
i = 18: $38.47700119018555
i = 19: $38.60749816894531
i = 20: $38.454498291015625
i = 21: $38.352500915527344
i = 22: $38.43899917602539
i = 23: $38.573001861572266
i = 24: $39.00400161743164
xTrain shape:
 (721, 21, 7)
xTrain:
 [[[0.03979971 0.54984443 0.33356242 ... 0.         0.         0.        ]
  [0.03784217 0.52658754 0.36448855 ... 0.00485127 0.00458608 0.00405299]
  [0.04012415 0.5420394  0.42206614 ... 0.00913219 0.00912937 0.00867805]
  ...
  [0.03604685 0.53281581 0.47044836 ... 0.01657964 0.0570

In [None]:
# Import keras stuff
import tensorflow as tf
import keras
from keras import optimizers
from keras.callbacks import History
from keras.models import Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate, Layer
import numpy as np
import keras.backend as K
tf.random.set_seed(20)
np.random.seed(10)

In [None]:
# Create & run model

# Add bahdanau attention layer to the deep learning network
class attention(Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)
 
    def build(self,input_shape):
        self.W=self.add_weight(name='attention_weight', shape=(input_shape[-1],1), 
                               initializer='random_normal', trainable=True)
        self.b=self.add_weight(name='attention_bias', shape=(input_shape[1],1), 
                               initializer='zeros', trainable=True)        
        super(attention, self).build(input_shape)
 
    def call(self,x):
        # Alignment scores. Pass them through tanh function
        e = K.tanh(K.dot(x,self.W)+self.b)
        # Remove dimension of size 1
        e = K.squeeze(e, axis=-1)   
        # Compute the weights
        alpha = K.softmax(e)
        # Reshape to tensorFlow format
        alpha = K.expand_dims(alpha, axis=-1)
        # Compute the context vector
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

lstm_input = Input(shape=(history_points, len(intermediateFeatureVec)), name='lstm_input')
inputs = LSTM(21, name='first_layer', return_sequences = True)(lstm_input)
inputs = attention()(inputs)
inputs = Dropout(0.5, name='first_dropout_layer')(inputs)
inputs = LSTM(64, name='lstm_1')(inputs)
inputs = attention()(inputs)
inputs = Dropout(0.5, name='lstm_dropout_1')(inputs)
inputs = Dense(32, name='first_dense_layer')(inputs)
inputs = Dense(1, name='dense_layer')(inputs)
output = Activation('linear', name='output')(inputs)
model = Model(inputs=lstm_input, outputs=output)
adam = optimizers.Adam(learning_rate = 8e-4) #.0008 used for general patterns, 5e-6 used for seasonality
model.compile(optimizer=adam, loss='mse')
epochs=75

earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')

history = model.fit(x=X_train, y=y_train, batch_size=15, epochs=epochs, shuffle=True, validation_split = 0.1, callbacks=[earlyStopping, mcp_save])

ValueError: ignored

In [None]:
# Plot training & validation loss
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(loss))

plt.figure()
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc=0)
#plt.xlim(left=5,right=epochs)

plt.show()

In [None]:
# Show training/test predictions
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

original_nonprocessed_y = [ele[0] for ele in totalY_data]
original_nonprocessed_y = original_nonprocessed_y[history_points:]

original_processed_y = [ele[0] for ele in yNormaliser.inverse_transform(yData_normalised)]
original_processed_y = original_processed_y[history_points:]

predicted_train_y = [ele[0] for ele in yNormaliser.inverse_transform(train_predict)]
predicted_test_y = [ele[0] for ele in yNormaliser.inverse_transform(test_predict)]
predictions_train = np.concatenate((predicted_train_y,np.zeros((len(predicted_test_y),), dtype=float)))
predictions_test = np.concatenate((np.zeros((len(predicted_train_y),),dtype=float),predicted_test_y))
predictions_train[predictions_train[:] == 0] = np.nan
predictions_test[predictions_test[:] == 0] = np.nan

# Shift the entire vector back by one since it is predicting a day in advance
predictions_train = np.append(predictions_train[1:],np.nan)
predictions_test = np.append(predictions_test[1:],np.nan)

print(len(original_nonprocessed_y))
print(len(original_processed_y))
print(len(predictions_train))
print(len(predictions_test))

new_pred_plot = pd.DataFrame({
    'originalUnprocessed':original_nonprocessed_y,
    'originalProcessed':original_processed_y,
    'predictionsTrain':predictions_train,
    'predictionsTest':predictions_test
})

fig3 = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['originalUnprocessed'], new_pred_plot['originalProcessed'], new_pred_plot['predictionsTrain'],new_pred_plot['predictionsTest']], labels={'value': 'Stock price','index': 'Timestamp'})


names3 = cycle(['Original close price (unprocessed)','Original close price (processed)','Train/Validation Predictions (model has access)','Test Predictions (model has no access)'])
fig3.for_each_trace(lambda t:  t.update(name = next(names3)))
fig3.update_xaxes(showgrid=False)
fig3.update_yaxes(showgrid=False)
fig3.show()


In [None]:
# Predictions: selling/buying anything a difference is predicted
bal = 100000.0
shares = 0

yhat = np.array(predictions_test.copy())
y = np.array(original_processed_y.copy())

print("Start balance:",bal)
print(len(y))
print(len(datesPredicted))
print(len(dates_test))
print(len(predictions_test))
predictionsLessThanLastPrice = 0

indicesBuy = []
indicesSell = []

# Limit to prediction range only
mask = (datesPredicted >= dates_test[0]) & (datesPredicted <= dates_test[-1])
print(mask)
yhat = yhat[mask]
y = y[mask]

fig4 = plt.figure()
plt.grid()
plt.xlabel("Timestamp")
plt.ylabel("Stock Price")
plt.plot(dates_test,y,'b-') # masked portion would be dates_test, start date would be dates_test[0]
plt.title("Test Data - Simulated Trading")

for i in range(1,len(y)):
    curDayPrice = y[i-1]

    new = yhat[i] # prediction for the next day

    old = curDayPrice # last real price that we have access to
    if new < old:
        predictionsLessThanLastPrice += 1
    
    percentChangePredicted = (new - old)/old * 100

    if percentChangePredicted <= 0: # sell
        # Sell my entire portfolio
        if shares > 0:
          indicesSell.append([dates_test[i],curDayPrice])

        bal += float(shares) * curDayPrice
        shares = 0

    elif percentChangePredicted > 0: # buy
        shares = bal // curDayPrice
        bal -= float(shares) * curDayPrice

        if shares > 0:
          indicesBuy.append([dates_test[i],curDayPrice])

plt.plot([ele[0] for ele in indicesBuy],[ele[1] for ele in indicesBuy],'g+')
plt.plot([ele[0] for ele in indicesSell],[ele[1] for ele in indicesSell],'r*')
plt.legend(["Real Prices","Simualted Share Buys","Simulated Share Sells"])
    
plt.show()
    

print("Percent predictions less than the last: ",(float(predictionsLessThanLastPrice)/len(y)*100),'%',sep='')
print("Ending balance: ",bal)
print("Ending shares: ",shares)
print("Ending net worth:",curDayPrice * float(shares) + bal)
