# Deep Learning Stock Movement

By Patrick Ma <fivetwentysix@gmail.com>

In [1]:
import time
import pandas as pd
import talib
import numpy as np
import keras
import dotenv
import os
from keras import models, layers

dotenv.load_dotenv('.env')

def prepare_data_for_stock(symbol, period_type='day', period='10', frequency_type='minute', frequency='5', start_date=None):
  AMERITRADE_API_KEY=os.environ.get('AMERITRADE_API_KEY')
  endpoint = f'https://api.tdameritrade.com/v1/marketdata/{symbol}/pricehistory?apikey={AMERITRADE_API_KEY}&periodType={period_type}&period={period}&frequencyType={frequency_type}&frequency={frequency}'
  if start_date:
    endpoint + f'&startDate={start_date}'
  data = pd.read_json(endpoint)
  candles = pd.DataFrame.from_records(data.candles)
  candles.describe()

  # add bollinger bands
  upperband, middleband, lowerband = talib.BBANDS(candles['close'], timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)
  candles['upperband'] = upperband
  candles['middleband'] = middleband
  candles['lowerband'] = lowerband
  for x in ('open', 'high', 'low', 'close'):
    candles[f'+1_{x}'] = candles[x].shift(-1)

  # add macd
  macd, macdsignal, macdhist = talib.MACD(candles['close'], fastperiod=12, slowperiod=26, signalperiod=9)
  candles['macd'] = macd
  candles['macdsignal'] = macdsignal
  candles['macdhist'] = macdhist

  # Data without NaNs
  candles_no_na = candles.dropna()
  feature_names = ['open', 'low',	'close', 'high', 'volume', 'upperband', 'middleband', 'lowerband', 'macd', 'macdsignal', 'macdhist']
  features = candles_no_na[feature_names]
  targets = candles_no_na[['+1_open', '+1_low', '+1_high', '+1_close']]
  
  return (candles, candles_no_na, features, targets, feature_names)

In [2]:
def build_model(features, targets, hidden_layers=5, epochs=10):
    normalizer = layers.Normalization()
    normalizer.adapt(features[:])
    normed = normalizer(features[:])

    inputs = keras.Input(shape=(len(features.columns),))
    dense = layers.Dense(10, activation="relu")
    x = dense(inputs)
    for i in range(hidden_layers):
        x = layers.Dense(10, activation="relu")(x)
    outputs = layers.Dense(len(targets.columns), activation=None)(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="mse",
        optimizer='adam',
        metrics=["MeanAbsoluteError", 'accuracy'],
    )
    model.fit(normed, targets, validation_split=0.1, epochs=epochs, verbose=1)
    return (model, normalizer)

In [3]:
(candles, candles_no_na, features, targets, feature_names) = prepare_data_for_stock('TSLA', frequency_type='daily', frequency='1', period_type='year', period='20')

HTTPError: HTTP Error 500: InvalidApiKey

In [None]:
(model, normalizer) = build_model(features, targets, hidden_layers=5, epochs=50)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
predictions = model.predict(normalizer(candles[feature_names]))



In [None]:
from datetime import datetime
predictions = pd.DataFrame(predictions, columns=['Open + 1 Prediction','Low + 1 Prediction','High + 1 Prediction','Close + 1 Prediction'])
res = candles[['datetime'] + feature_names].join(predictions)
res['datetime'] = res['datetime'].apply(lambda d: pd.to_datetime(d, unit='ms'))

In [None]:
display(res)

Unnamed: 0,datetime,open,low,close,high,volume,upperband,middleband,lowerband,macd,macdsignal,macdhist,Open + 1 Prediction,Low + 1 Prediction,High + 1 Prediction,Close + 1 Prediction
0,2010-06-29 05:00:00,1.266667,1.169333,1.592667,1.666667,281749140,,,,,,,,,,
1,2010-06-30 05:00:00,1.719333,1.553333,1.588667,2.027947,257915907,,,,,,,,,,
2,2010-07-01 05:00:00,1.666667,1.351333,1.464000,1.728000,123447942,,,,,,,,,,
3,2010-07-02 05:00:00,1.533333,1.247333,1.280000,1.540000,77127102,,,,,,,,,,
4,2010-07-06 05:00:00,1.333333,1.055333,1.074000,1.333333,103189437,1.797346,1.399867,1.002387,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3127,2022-11-29 06:00:00,184.990000,178.750000,180.830000,186.380000,83357111,190.119552,179.944000,169.768448,-12.361890,-13.628691,1.266800,171.281433,167.135437,174.388367,170.818069
3128,2022-11-30 06:00:00,182.430000,180.630100,194.700000,194.760000,109186404,194.844912,184.902000,174.959088,-10.629983,-13.028949,2.398966,179.964935,175.655045,183.161194,179.444473
3129,2022-12-01 06:00:00,197.080000,191.800000,194.700000,198.920000,80046213,199.538316,187.202000,174.865684,-9.151935,-12.253546,3.101611,185.069931,180.557724,188.329559,184.503098
3130,2022-12-02 06:00:00,191.780000,191.110000,194.860000,196.250000,73645922,202.289723,189.602000,176.914277,-7.876862,-11.378210,3.501347,186.679321,182.108276,189.944611,186.093933


In [None]:
res.describe()

Unnamed: 0,open,low,close,high,volume,upperband,middleband,lowerband,macd,macdsignal,macdhist,Open + 1 Prediction,Low + 1 Prediction,High + 1 Prediction,Close + 1 Prediction
count,3132.0,3132.0,3132.0,3132.0,3132.0,3128.0,3128.0,3128.0,3099.0,3099.0,3099.0,3099.0,3099.0,3099.0,3099.0
mean,58.338658,56.906177,58.302531,59.682406,93318540.0,61.692462,58.254719,54.816976,0.45213,0.465913,-0.013783,58.899887,57.4599,59.87487,58.679611
std,95.667916,93.195224,95.551473,97.945107,81839040.0,101.357767,95.465997,89.71783,4.96012,4.648482,1.505948,98.570839,96.043327,100.682564,98.354881
min,1.076,0.998667,1.053333,1.108667,0.0,1.208574,1.1176,0.87036,-25.271333,-22.213157,-7.678481,8.561301,8.340777,8.485901,8.433021
25%,8.643833,8.451167,8.590833,8.84095,42145160.0,9.253512,8.6863,7.948599,-0.205286,-0.191364,-0.093232,11.405007,11.141268,11.392679,11.29228
50%,16.159333,15.885667,16.171667,16.437333,75626240.0,16.793932,16.177867,15.366272,0.038275,0.037049,0.003772,12.739985,12.450535,12.753728,12.629384
75%,24.248,23.646333,24.156,24.58,117050100.0,25.246339,24.016067,23.174483,0.559299,0.545762,0.110125,22.646862,22.53647,22.6738,22.437984
max,411.47,405.666667,409.97,414.496667,914081400.0,441.566528,403.096667,389.760189,38.06793,31.285427,10.296174,442.931702,431.833588,451.503937,441.365143
