Imports

In [219]:
import os
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import finnhub
import talib as ta

Connect to Finnhub

In [190]:
load_dotenv()
finnhub_api_key = os.getenv('FINNHUB_API_KEY')
fh = finnhub.Client(api_key=finnhub_api_key)

Ensure the right ticker

In [200]:
# Search Finnhub for the recommended ticker
tickers = fh.symbol_lookup('SPDR S&P 500 ETF TRUST')

# Print the top result, SPY ETF
pd.DataFrame(tickers.get('result')).head(1)

Unnamed: 0,description,displaySymbol,symbol,type
0,SPDR S&P 500 ETF TRUST,SPY,SPY,ETP


Get dates for a ten-year window

In [192]:
# Get dates for a historical ten-year window
now = pd.Timestamp.now()
ten_years_ago = now - pd.Timedelta(days=365*10)

# Convert to UNIX timestamp
now_unix = int(pd.Timestamp.timestamp(now))
ten_years_ago_unix = int(pd.Timestamp.timestamp(ten_years_ago))

Get SPY OHLCV data

In [193]:
ohlcv = fh.stock_candles('SPY', 'D', ten_years_ago_unix, now_unix)

Format the data 

In [194]:
ohlcv = pd.DataFrame(ohlcv)
ohlcv.tail()

Unnamed: 0,c,h,l,o,s,t,v
2510,398.79,398.84,391.63,394.16,ok,1658361600,64903856
2511,395.09,400.18,392.75,398.92,ok,1658448000,72197332
2512,395.57,396.47,393.21,395.75,ok,1658707200,53631485
2513,390.89,394.06,389.95,393.84,ok,1658793600,52946393
2514,401.04,402.88,394.05,394.36,ok,1658880000,82342106


Check for nulls or missing data

In [195]:
# The `s` column indicates the response status
# if there are any issues, the string "no_data" is returned from Finnhub
ohlcv.loc[ohlcv['s'] == 'no_data']

Unnamed: 0,c,h,l,o,s,t,v


In [196]:
# Check for any null rows
ohlcv.isnull().values.any()

False

Drop the status column an create the main DataFrame 'Features'

In [202]:
features = ohlcv.drop(columns='s')
features.columns = ['close', 'high', 'low', 'open', 'timestamp', 'volume']
features.tail()

Unnamed: 0,close,high,low,open,timestamp,volume
2510,398.79,398.84,391.63,394.16,1658361600,64903856
2511,395.09,400.18,392.75,398.92,1658448000,72197332
2512,395.57,396.47,393.21,395.75,1658707200,53631485
2513,390.89,394.06,389.95,393.84,1658793600,52946393
2514,401.04,402.88,394.05,394.36,1658880000,82342106


Derive features with TA Lib

In [217]:
close = features['close']

# Hilbert Transform - Instantaneous Trendline
features['trend'] = ta.HT_TRENDLINE(close)

# RSI
features['rsi'] = ta.RSI(close, timeperiod=20)

# Stochastic RSI
features['rsi_fast_k'], features['rsi_fast_d'] = ta.STOCHRSI(close, timeperiod=14, fastk_period=5, fastd_period=3, fastd_matype=0)

In [218]:
features.head(65)

Unnamed: 0,close,high,low,open,timestamp,volume,trend,rsi,rsi_fast_k,rsi_fast_d
0,138.680,139.339,138.2700,138.52,1343606400,106781926,,,,
1,137.710,138.870,137.7100,138.49,1343692800,120575835,,,,
2,137.590,138.730,137.4000,138.70,1343779200,138293740,,,,
3,136.640,137.570,135.5800,136.55,1343865600,199556580,,,,
4,139.349,139.640,136.6794,138.56,1343952000,157824975,,,,
...,...,...,...,...,...,...,...,...,...,...
60,141.420,142.060,140.8300,141.86,1350950400,192056293,,42.547036,0.000000,0.239554
61,141.020,142.100,140.8000,141.93,1351036800,120179305,,41.441927,0.000000,0.239554
62,141.430,142.280,140.5700,142.02,1351123200,134457331,,43.038245,26.377233,8.792411
63,141.350,141.840,140.3900,141.30,1351209600,146023428,144.089082,42.798613,23.002475,16.459902
