Imports

In [2]:
import os
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import finnhub
import talib as ta

Connect to Finnhub

In [3]:
load_dotenv()
finnhub_api_key = os.getenv('FINNHUB_API_KEY')
fh = finnhub.Client(api_key=finnhub_api_key)

Ensure the right ticker

In [4]:
# Search Finnhub for the recommended ticker
tickers = fh.symbol_lookup('SPDR S&P 500 ETF TRUST')

# Print the top result, SPY ETF
pd.DataFrame(tickers.get('result')).head(1)

Unnamed: 0,description,displaySymbol,symbol,type
0,SPDR S&P 500 ETF TRUST,SPY,SPY,ETP


Get dates for a ten-year window and convert to UNIX Timestamp integers

In [5]:
# Create datetime objects
end = pd.Timestamp("2022-08-01 23:59:00")
start = end - pd.Timedelta(days=365*10+2) # +2 leap days

# Convert datetime objects to UNIX timestamp
end_unix = int(pd.Timestamp.timestamp(end))
start_unix = int(pd.Timestamp.timestamp(start))

Get OHLCV data for ticker SPY

In [6]:
ohlcv = fh.stock_candles('SPY', 'D', start_unix, end_unix)

Format and preview the data 

In [7]:
# Note: price data is already adjusted by Finnhub
ohlcv = pd.DataFrame(ohlcv)
ohlcv.tail()

Unnamed: 0,c,h,l,o,s,t,v
2511,390.89,394.06,389.95,393.84,ok,1658793600,52946393
2512,401.04,402.88,394.05,394.36,ok,1658880000,82342106
2513,406.07,406.8,398.15,401.89,ok,1658966400,73966563
2514,411.99,413.03,406.77,407.58,ok,1659052800,87003672
2515,410.77,413.41,408.4,409.15,ok,1659312000,69997471


Check for nulls or missing data

In [8]:
# The `s` column indicates the response status
# if there are any issues, the string "no_data" is returned from Finnhub
ohlcv.loc[ohlcv['s'] == 'no_data']

Unnamed: 0,c,h,l,o,s,t,v


In [9]:
# Check for any null rows
ohlcv.isnull().values.any()

False

Convert UNIX timestamp to date objects and index the date

In [10]:
ohlcv['date'] = pd.to_datetime(ohlcv['t'], unit='s')
ohlcv = ohlcv.set_index(ohlcv['date'])

Drop columns and create the features DataFrame 'X'

In [11]:
X = ohlcv.drop(columns=['s', 't', 'date'])
X.columns = ['close', 'high', 'low', 'open', 'volume']
X.tail()

Unnamed: 0_level_0,close,high,low,open,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-07-26,390.89,394.06,389.95,393.84,52946393
2022-07-27,401.04,402.88,394.05,394.36,82342106
2022-07-28,406.07,406.8,398.15,401.89,73966563
2022-07-29,411.99,413.03,406.77,407.58,87003672
2022-08-01,410.77,413.41,408.4,409.15,69997471


Add technical analysis features to the DataFrame

In [12]:
close = X['close']
high = X['high']
low = X['low']

# Hilbert Transform - Instantaneous Trendline
X['trend'] = ta.HT_TRENDLINE(close)

# RSI
X['rsi'] = ta.RSI(close, timeperiod=20)

# Stochastic RSI
X['rsi_fast_k'], X['rsi_fast_d'] = ta.STOCHRSI(
    close, 
    timeperiod=14, 
    fastk_period=5, 
    fastd_period=3, 
    fastd_matype=0
)

# Williams' %R
X['williams_r'] = ta.WILLR(high, low, close, timeperiod=14)

# MACD, MACD Signal, MACD Histogram
X['macd'], X['macd_signal'], X['macd_hist'] = ta.MACD(
    close, 
    fastperiod=12, 
    slowperiod=26, 
    signalperiod=9
)

# Moving Averages
X['ma_20'] = ta.MA(close, timeperiod=20, matype=0)
X['ma_50'] = ta.MA(close, timeperiod=50, matype=0)
X['ma_65'] = ta.MA(close, timeperiod=65, matype=0)
X['ma_200'] = ta.MA(close, timeperiod=200, matype=0)

# Bollinger Bands
X['bb_upp'], X['bb_mid'], X['bb_low'] = ta.BBANDS(
    close, 
    timeperiod=5, 
    nbdevup=2, 
    nbdevdn=2, 
    matype=0
)

# Percentage Price Oscillator
X['ppo'] = ta.PPO(close, fastperiod=12, slowperiod=26, matype=0)

# Momentum
X['mom'] = ta.MOM(close, timeperiod=10)

# Rate of Change
X['roc'] = ta.ROC(close, timeperiod=10)

# Exponential Moving Averages
X['ema_20'] = ta.EMA(close, timeperiod=20)
X['ema_50'] = ta.EMA(close, timeperiod=50)
X['ema_65'] = ta.EMA(close, timeperiod=65)
X['ema_200'] = ta.EMA(close, timeperiod=200)

In [13]:
X.tail()

Unnamed: 0_level_0,close,high,low,open,volume,trend,rsi,rsi_fast_k,rsi_fast_d,williams_r,...,bb_upp,bb_mid,bb_low,ppo,mom,roc,ema_20,ema_50,ema_65,ema_200
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-07-26,390.89,394.06,389.95,393.84,52946393,385.898,49.785606,0.0,34.261989,-31.880577,...,400.053078,395.022,389.990922,0.979949,10.06,2.641599,388.656472,394.164873,398.402067,418.154507
2022-07-27,401.04,402.88,394.05,394.36,82342106,386.804389,55.163433,98.608208,42.891288,-5.778894,...,403.200156,396.276,389.351844,0.988895,22.21,5.862788,389.835856,394.434486,398.482005,417.984213
2022-07-28,406.07,406.8,398.15,401.89,73966563,387.952778,57.535789,100.0,66.202736,-2.041387,...,408.27458,397.732,387.18942,1.221374,28.16,7.45151,391.381965,394.890781,398.711944,417.865664
2022-07-29,411.99,413.03,406.77,407.58,87003672,389.53698,60.148111,100.0,99.536069,-2.47678,...,416.028323,401.112,386.195677,1.556088,26.86,6.974268,393.344635,395.561338,399.114309,417.8072
2022-08-01,410.77,413.41,408.4,409.15,69997471,391.093075,59.356006,90.899678,96.966559,-6.230824,...,419.500644,404.152,388.803356,1.931096,28.82,7.54549,395.004193,396.157756,399.467512,417.737178


Add bond yields as features

In [14]:
X.to_csv('X.csv')