Imports

In [1]:
import os
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import finnhub
import yfinance as yf
import talib as ta

Get dates for a ten-year window and convert to UNIX Timestamp integers

In [2]:
# Create datetime objects
end = pd.Timestamp("2022-08-01 23:59:00")
start = end - pd.Timedelta(days=365*10+2) # +2 leap days

# Convert datetime objects to UNIX timestamp
end_unix = int(pd.Timestamp.timestamp(end))
start_unix = int(pd.Timestamp.timestamp(start))

Get SPY data from FinnHub

In [3]:
# Connect to FinnHub API
load_dotenv()
finnhub_api_key = os.getenv('FINNHUB_API_KEY')
fh = finnhub.Client(api_key=finnhub_api_key)

# Note: price data is already adjusted by Finnhub
spy_ohlcv = fh.stock_candles('SPY', 'D', start_unix, end_unix)
spy_ohlcv = pd.DataFrame(spy_ohlcv)
spy_ohlcv.tail()

Unnamed: 0,c,h,l,o,s,t,v
2511,390.89,394.06,389.95,393.84,ok,1658793600,52946393
2512,401.04,402.88,394.05,394.36,ok,1658880000,82342106
2513,406.07,406.8,398.15,401.89,ok,1658966400,73966563
2514,411.99,413.03,406.77,407.58,ok,1659052800,87003672
2515,410.77,413.41,408.4,409.15,ok,1659312000,69997471


Check for nulls or missing data

In [4]:
# The `s` column indicates the response status
# Finnhub returns the string "no_data" for missing rows
spy_ohlcv.loc[spy_ohlcv['s'] == 'no_data']

Unnamed: 0,c,h,l,o,s,t,v


In [5]:
# Check for any null rows
spy_ohlcv.isnull().values.any()

False

Convert UNIX timestamp to date objects and index the date

In [6]:
spy_ohlcv['date'] = pd.to_datetime(spy_ohlcv['t'], unit='s')
spy_ohlcv = spy_ohlcv.set_index(spy_ohlcv['date'])
spy_ohlcv.tail()

Unnamed: 0_level_0,c,h,l,o,s,t,v,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-07-26,390.89,394.06,389.95,393.84,ok,1658793600,52946393,2022-07-26
2022-07-27,401.04,402.88,394.05,394.36,ok,1658880000,82342106,2022-07-27
2022-07-28,406.07,406.8,398.15,401.89,ok,1658966400,73966563,2022-07-28
2022-07-29,411.99,413.03,406.77,407.58,ok,1659052800,87003672,2022-07-29
2022-08-01,410.77,413.41,408.4,409.15,ok,1659312000,69997471,2022-08-01


Drop columns and create the features DataFrame 'X'

In [7]:
X = spy_ohlcv.drop(columns=['s', 't', 'date'])
X.columns = ['close', 'high', 'low', 'open', 'volume']

Check dates

In [8]:
display(X.shape, X.head(), X.tail())

(2516, 5)

Unnamed: 0_level_0,close,high,low,open,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-08-01,137.59,138.73,137.4,138.7,138293740
2012-08-02,136.64,137.57,135.58,136.55,199556580
2012-08-03,139.349,139.64,136.6794,138.56,157824975
2012-08-06,139.62,140.17,139.56,139.72,86327738
2012-08-07,140.32,140.92,140.03,140.18,109545089


Unnamed: 0_level_0,close,high,low,open,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-07-26,390.89,394.06,389.95,393.84,52946393
2022-07-27,401.04,402.88,394.05,394.36,82342106
2022-07-28,406.07,406.8,398.15,401.89,73966563
2022-07-29,411.99,413.03,406.77,407.58,87003672
2022-08-01,410.77,413.41,408.4,409.15,69997471


Add technical analysis features to the DataFrame

In [9]:
close = X['close']
high = X['high']
low = X['low']

# Hilbert Transform - Instantaneous Trendline
X['trend'] = ta.HT_TRENDLINE(close)

# RSI
X['rsi'] = ta.RSI(close, timeperiod=20)

# Stochastic RSI
X['rsi_fast_k'], X['rsi_fast_d'] = ta.STOCHRSI(
    close, 
    timeperiod=14, 
    fastk_period=5, 
    fastd_period=3, 
    fastd_matype=0
)

# Williams' %R
X['williams_r'] = ta.WILLR(high, low, close, timeperiod=14)

# MACD, MACD Signal, MACD Histogram
X['macd'], X['macd_signal'], X['macd_hist'] = ta.MACD(
    close, 
    fastperiod=12, 
    slowperiod=26, 
    signalperiod=9
)

# Moving Averages
X['ma_20'] = ta.MA(close, timeperiod=20, matype=0)
X['ma_50'] = ta.MA(close, timeperiod=50, matype=0)
X['ma_65'] = ta.MA(close, timeperiod=65, matype=0)
X['ma_200'] = ta.MA(close, timeperiod=200, matype=0)

# Bollinger Bands
X['bb_upp'], X['bb_mid'], X['bb_low'] = ta.BBANDS(
    close, 
    timeperiod=5, 
    nbdevup=2, 
    nbdevdn=2, 
    matype=0
)

# Percentage Price Oscillator
X['ppo'] = ta.PPO(close, fastperiod=12, slowperiod=26, matype=0)

# Momentum
X['mom'] = ta.MOM(close, timeperiod=10)

# Rate of Change
X['roc'] = ta.ROC(close, timeperiod=10)

# Exponential Moving Averages
X['ema_20'] = ta.EMA(close, timeperiod=20)
X['ema_50'] = ta.EMA(close, timeperiod=50)
X['ema_65'] = ta.EMA(close, timeperiod=65)
X['ema_200'] = ta.EMA(close, timeperiod=200)

Get Indices data from Yahoo!

In [10]:
indices_ohlcv = yf.download('^GSPC ^NDX ^DJI ^DJT', start='2012-08-01', end='2022-08-02')
indices_close = indices_ohlcv['Adj Close']
indices_close.tail()

[*********************100%***********************]  4 of 4 completed


Unnamed: 0_level_0,^DJI,^DJT,^GSPC,^NDX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-26,31761.539062,13614.139648,3921.050049,12086.900391
2022-07-27,32197.589844,13847.009766,4023.610107,12601.469727
2022-07-28,32529.630859,14275.299805,4072.429932,12717.870117
2022-07-29,32845.128906,14609.0,4130.290039,12947.969727
2022-08-01,32798.398438,14634.089844,4118.629883,12940.780273


Check for nulls

In [11]:
indices_close.isnull().values.any()

False

Format column Names

In [12]:
indices_close.columns = ['dji', 'djt', 'gspc', 'ndx']

Check shape and dates

In [13]:
display(indices_close.shape, indices_close.head(), indices_close.tail())

(2516, 4)

Unnamed: 0_level_0,dji,djt,gspc,ndx
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-08-01,12976.129883,4986.709961,1375.319946,2635.129883
2012-08-02,12878.879883,4984.149902,1365.0,2625.52002
2012-08-03,13096.169922,5086.310059,1390.98999,2676.0
2012-08-06,13117.509766,5082.359863,1394.22998,2694.090088
2012-08-07,13168.599609,5092.459961,1401.349976,2717.159912


Unnamed: 0_level_0,dji,djt,gspc,ndx
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-26,31761.539062,13614.139648,3921.050049,12086.900391
2022-07-27,32197.589844,13847.009766,4023.610107,12601.469727
2022-07-28,32529.630859,14275.299805,4072.429932,12717.870117
2022-07-29,32845.128906,14609.0,4130.290039,12947.969727
2022-08-01,32798.398438,14634.089844,4118.629883,12940.780273


In [14]:
X.join(indices_close, on=X.index)

Unnamed: 0_level_0,close,high,low,open,volume,trend,rsi,rsi_fast_k,rsi_fast_d,williams_r,...,mom,roc,ema_20,ema_50,ema_65,ema_200,dji,djt,gspc,ndx
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-08-01,137.590,138.73,137.4000,138.70,138293740,,,,,,...,,,,,,,12976.129883,4986.709961,1375.319946,2635.129883
2012-08-02,136.640,137.57,135.5800,136.55,199556580,,,,,,...,,,,,,,12878.879883,4984.149902,1365.000000,2625.520020
2012-08-03,139.349,139.64,136.6794,138.56,157824975,,,,,,...,,,,,,,13096.169922,5086.310059,1390.989990,2676.000000
2012-08-06,139.620,140.17,139.5600,139.72,86327738,,,,,,...,,,,,,,13117.509766,5082.359863,1394.229980,2694.090088
2012-08-07,140.320,140.92,140.0300,140.18,109545089,,,,,,...,,,,,,,13168.599609,5092.459961,1401.349976,2717.159912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-26,390.890,394.06,389.9500,393.84,52946393,385.898000,49.785606,0.000000,34.261989,-31.880577,...,10.06,2.641599,388.656472,394.164873,398.402067,418.154507,31761.539062,13614.139648,3921.050049,12086.900391
2022-07-27,401.040,402.88,394.0500,394.36,82342106,386.804389,55.163433,98.608208,42.891288,-5.778894,...,22.21,5.862788,389.835856,394.434486,398.482005,417.984213,32197.589844,13847.009766,4023.610107,12601.469727
2022-07-28,406.070,406.80,398.1500,401.89,73966563,387.952778,57.535789,100.000000,66.202736,-2.041387,...,28.16,7.451510,391.381965,394.890781,398.711944,417.865664,32529.630859,14275.299805,4072.429932,12717.870117
2022-07-29,411.990,413.03,406.7700,407.58,87003672,389.536980,60.148111,100.000000,99.536069,-2.476780,...,26.86,6.974268,393.344635,395.561338,399.114309,417.807200,32845.128906,14609.000000,4130.290039,12947.969727


### DataFrame Overview

In [15]:
print(X.shape)
display(X.head())
display(X.tail())

(2516, 27)


Unnamed: 0_level_0,close,high,low,open,volume,trend,rsi,rsi_fast_k,rsi_fast_d,williams_r,...,bb_upp,bb_mid,bb_low,ppo,mom,roc,ema_20,ema_50,ema_65,ema_200
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-08-01,137.59,138.73,137.4,138.7,138293740,,,,,,...,,,,,,,,,,
2012-08-02,136.64,137.57,135.58,136.55,199556580,,,,,,...,,,,,,,,,,
2012-08-03,139.349,139.64,136.6794,138.56,157824975,,,,,,...,,,,,,,,,,
2012-08-06,139.62,140.17,139.56,139.72,86327738,,,,,,...,,,,,,,,,,
2012-08-07,140.32,140.92,140.03,140.18,109545089,,,,,,...,141.441336,138.7038,135.966264,,,,,,,


Unnamed: 0_level_0,close,high,low,open,volume,trend,rsi,rsi_fast_k,rsi_fast_d,williams_r,...,bb_upp,bb_mid,bb_low,ppo,mom,roc,ema_20,ema_50,ema_65,ema_200
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-07-26,390.89,394.06,389.95,393.84,52946393,385.898,49.785606,0.0,34.261989,-31.880577,...,400.053078,395.022,389.990922,0.979949,10.06,2.641599,388.656472,394.164873,398.402067,418.154507
2022-07-27,401.04,402.88,394.05,394.36,82342106,386.804389,55.163433,98.608208,42.891288,-5.778894,...,403.200156,396.276,389.351844,0.988895,22.21,5.862788,389.835856,394.434486,398.482005,417.984213
2022-07-28,406.07,406.8,398.15,401.89,73966563,387.952778,57.535789,100.0,66.202736,-2.041387,...,408.27458,397.732,387.18942,1.221374,28.16,7.45151,391.381965,394.890781,398.711944,417.865664
2022-07-29,411.99,413.03,406.77,407.58,87003672,389.53698,60.148111,100.0,99.536069,-2.47678,...,416.028323,401.112,386.195677,1.556088,26.86,6.974268,393.344635,395.561338,399.114309,417.8072
2022-08-01,410.77,413.41,408.4,409.15,69997471,391.093075,59.356006,90.899678,96.966559,-6.230824,...,419.500644,404.152,388.803356,1.931096,28.82,7.54549,395.004193,396.157756,399.467512,417.737178
