Imports

In [1]:
import os
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import finnhub
import talib as ta
# import pandas_ta as ta

Connect to Finnhub

In [2]:
load_dotenv()
finnhub_api_key = os.getenv('FINNHUB_API_KEY')
fh = finnhub.Client(api_key=finnhub_api_key)

Ensure the right ticker

In [3]:
# Search Finnhub for the recommended ticker
tickers = fh.symbol_lookup('SPDR S&P 500 ETF TRUST')

# Print the top result, SPY ETF
pd.DataFrame(tickers.get('result')).head(1)

Unnamed: 0,description,displaySymbol,symbol,type
0,SPDR S&P 500 ETF TRUST,SPY,SPY,ETP


Get dates for a ten-year window

In [17]:
# Create datetime objects
end = pd.Timestamp("2022-08-01 00:00:00")
start = end - pd.Timedelta(days=365*10+2) # +2 days for leap years

# Convert datetime objects to UNIX timestamp
end_unix = int(pd.Timestamp.timestamp(end))
start_unix = int(pd.Timestamp.timestamp(start))

Get SPY OHLCV data

In [11]:
ohlcv = fh.stock_candles('SPY', 'D', ten_years_ago_unix, now_unix)

Format the data 

In [12]:
# Note: price data is already adjusted by Finnhub
ohlcv = pd.DataFrame(ohlcv)
ohlcv.tail()

Unnamed: 0,c,h,l,o,s,t,v
3648,10457.110343,10474.846637,10401.814838,10433.114181,ok,1659052800,85
3649,10464.3431,10521.983447,10448.623005,10480.063195,ok,1659139200,5
3650,10483.159005,10500.93948,10427.725759,10459.103068,ok,1659225600,32
3651,10490.409778,10548.193708,10474.650525,10506.169032,ok,1659312000,58
3652,10509.272554,10527.097321,10453.701224,10485.156694,ok,1659398400,23


Check for nulls or missing data

In [13]:
# The `s` column indicates the response status
# if there are any issues, the string "no_data" is returned from Finnhub
ohlcv.loc[ohlcv['s'] == 'no_data']

Unnamed: 0,c,h,l,o,s,t,v


In [14]:
# Check for any null rows
ohlcv.isnull().values.any()

False

Convert timestamp to date and index the date

In [15]:
ohlcv['date'] = pd.to_datetime(ohlcv['t'], unit='s')
ohlcv.set_index(ohlcv['date'], inplace=True)
ohlcv.tail()

Unnamed: 0_level_0,c,h,l,o,s,t,v,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-07-29,10457.110343,10474.846637,10401.814838,10433.114181,ok,1659052800,85,2022-07-29
2022-07-30,10464.3431,10521.983447,10448.623005,10480.063195,ok,1659139200,5,2022-07-30
2022-07-31,10483.159005,10500.93948,10427.725759,10459.103068,ok,1659225600,32,2022-07-31
2022-08-01,10490.409778,10548.193708,10474.650525,10506.169032,ok,1659312000,58,2022-08-01
2022-08-02,10509.272554,10527.097321,10453.701224,10485.156694,ok,1659398400,23,2022-08-02


Drop the status column an create the features DataFrame 'X'

In [16]:
X = ohlcv.drop(columns=['s', 't', 'date'])
X.columns = ['close', 'high', 'low', 'open', 'volume']
X.tail()

Unnamed: 0_level_0,close,high,low,open,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-07-29,10457.110343,10474.846637,10401.814838,10433.114181,85
2022-07-30,10464.3431,10521.983447,10448.623005,10480.063195,5
2022-07-31,10483.159005,10500.93948,10427.725759,10459.103068,32
2022-08-01,10490.409778,10548.193708,10474.650525,10506.169032,58
2022-08-02,10509.272554,10527.097321,10453.701224,10485.156694,23


Add technical analysis features to the DataFrame

In [111]:
close = X['close']
high = X['high']
low = X['low']

# Hilbert Transform - Instantaneous Trendline
X['trend'] = ta.HT_TRENDLINE(close)

# RSI
X['rsi'] = ta.RSI(close, timeperiod=20)

# Stochastic RSI
X['rsi_fast_k'], X['rsi_fast_d'] = ta.STOCHRSI(
    close, 
    timeperiod=14, 
    fastk_period=5, 
    fastd_period=3, 
    fastd_matype=0
)

# Williams' %R
X['williams_r'] = ta.WILLR(high, low, close, timeperiod=14)

# MACD, MACD Signal, MACD Histogram
X['macd'], X['macd_signal'], X['macd_hist'] = ta.MACD(
    close, 
    fastperiod=12, 
    slowperiod=26, 
    signalperiod=9
)

# Moving Averages
X['ma_20'] = ta.MA(close, timeperiod=20, matype=0)
X['ma_50'] = ta.MA(close, timeperiod=50, matype=0)
X['ma_65'] = ta.MA(close, timeperiod=65, matype=0)
X['ma_200'] = ta.MA(close, timeperiod=200, matype=0)

# Bollinger Bands
X['bb_upp'], X['bb_mid'], X['bb_low'] = ta.BBANDS(
    close, 
    timeperiod=5, 
    nbdevup=2, 
    nbdevdn=2, 
    matype=0
)

# Percentage Price Oscillator
X['ppo'] = ta.PPO(close, fastperiod=12, slowperiod=26, matype=0)

# Momentum
X['mom'] = ta.MOM(close, timeperiod=10)

# Rate of Change
X['roc'] = ta.ROC(close, timeperiod=10)

# Exponential Moving Averages
X['ema_20'] = ta.EMA(close, timeperiod=20)
X['ema_50'] = ta.EMA(close, timeperiod=50)
X['ema_65'] = ta.EMA(close, timeperiod=65)
X['ema_200'] = ta.EMA(close, timeperiod=200)




In [112]:
features.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3652 entries, 2012-08-02 to 2022-08-01
Data columns (total 27 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   close        3652 non-null   float64
 1   high         3652 non-null   float64
 2   low          3652 non-null   float64
 3   open         3652 non-null   float64
 4   volume       3652 non-null   int64  
 5   trend        3589 non-null   float64
 6   rsi          3632 non-null   float64
 7   rsi_fast_k   3632 non-null   float64
 8   rsi_fast_d   3632 non-null   float64
 9   williams_r   3639 non-null   float64
 10  macd         3619 non-null   float64
 11  macd_signal  3619 non-null   float64
 12  macd_hist    3619 non-null   float64
 13  ma_20        3633 non-null   float64
 14  ma_50        3603 non-null   float64
 15  ma_65        3588 non-null   float64
 16  ma_200       3453 non-null   float64
 17  bb_upp       3648 non-null   float64
 18  bb_mid       3648 non-null   f

In [113]:
features.tail()

Unnamed: 0_level_0,close,high,low,open,volume,trend,rsi,rsi_fast_k,rsi_fast_d,williams_r,...,bb_upp,bb_mid,bb_low,ppo,mom,roc,ema_20,ema_50,ema_65,ema_200
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-07-28,10209.478296,10226.794582,10155.492229,10186.050381,34,9941.139512,84.246133,100.0,100.0,-5.230812,...,10225.529536,10150.05456,10074.579583,1.353962,126.214288,1.251721,10030.317438,9820.482686,9722.667257,9026.507389
2022-07-29,10255.420949,10272.815158,10201.191944,10231.887607,49,9955.395799,85.679771,100.0,100.0,-5.230812,...,10270.63172,10182.525212,10094.418705,1.318753,165.182759,1.637055,10051.755868,9837.539089,9738.811308,9038.735385
2022-07-30,10301.570343,10319.042826,10247.097308,10277.931101,8,9971.967284,86.936755,100.0,100.0,-5.230812,...,10330.438162,10215.141983,10099.845804,1.273338,231.512629,2.29902,10075.547723,9855.736393,9755.864612,9051.300907
2022-07-31,10347.92741,10365.478519,10293.209246,10324.181791,58,9991.310291,88.046215,100.0,100.0,-5.004193,...,10385.867022,10255.627692,10125.388361,1.259816,298.009811,2.965296,10101.488645,9875.038001,9773.805909,9064.202663
2022-08-01,10355.084649,10412.123172,10339.528688,10370.64061,40,10012.562128,88.208964,100.0,100.0,-14.35391,...,10404.635727,10293.896329,10183.156932,1.283254,287.096322,2.851576,10125.640646,9893.86336,9791.420416,9077.04726
