Imports

In [2]:
import os
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import finnhub
import talib as ta

Connect to Finnhub

In [3]:
load_dotenv()
finnhub_api_key = os.getenv('FINNHUB_API_KEY')
fh = finnhub.Client(api_key=finnhub_api_key)

Ensure the right ticker

In [4]:
# Search Finnhub for the recommended ticker
tickers = fh.symbol_lookup('SPDR S&P 500 ETF TRUST')

# Print the top result, SPY ETF
pd.DataFrame(tickers.get('result')).head(1)

Unnamed: 0,description,displaySymbol,symbol,type
0,SPDR S&P 500 ETF TRUST,SPY,SPY,ETP


Get dates for a ten-year window

In [5]:
# Get dates for a historical ten-year window
now = pd.Timestamp.now()
ten_years_ago = now - pd.Timedelta(days=365*10)

# Convert to UNIX timestamp
now_unix = int(pd.Timestamp.timestamp(now))
ten_years_ago_unix = int(pd.Timestamp.timestamp(ten_years_ago))

Get SPY OHLCV data

In [6]:
ohlcv = fh.stock_candles('SPY', 'D', ten_years_ago_unix, now_unix)

Format the data 

In [7]:
# Note: price data is already adjusted by Finnhub
ohlcv = pd.DataFrame(ohlcv)
ohlcv.tail()

Unnamed: 0,c,h,l,o,s,t,v
3646,13737.754101,13813.425255,13717.116514,13758.391689,ok,1658620800,4
3647,13710.278593,13785.798405,13689.682281,13730.874905,ok,1658707200,88
3648,13682.858036,13758.226808,13662.302916,13703.413155,ok,1658793600,19
3649,13707.461144,13730.710354,13634.97831,13676.006329,ok,1658880000,2
3650,13769.144719,13792.498551,13696.335713,13737.548358,ok,1658966400,4


Check for nulls or missing data

In [8]:
# The `s` column indicates the response status
# if there are any issues, the string "no_data" is returned from Finnhub
ohlcv.loc[ohlcv['s'] == 'no_data']

Unnamed: 0,c,h,l,o,s,t,v


In [9]:
# Check for any null rows
ohlcv.isnull().values.any()

False

Drop the status column an create the main DataFrame 'Features'

In [10]:
features = ohlcv.drop(columns='s')
features.columns = ['close', 'high', 'low', 'open', 'timestamp', 'volume']
features.tail()

Unnamed: 0,close,high,low,open,timestamp,volume
3646,13737.754101,13813.425255,13717.116514,13758.391689,1658620800,4
3647,13710.278593,13785.798405,13689.682281,13730.874905,1658707200,88
3648,13682.858036,13758.226808,13662.302916,13703.413155,1658793600,19
3649,13707.461144,13730.710354,13634.97831,13676.006329,1658880000,2
3650,13769.144719,13792.498551,13696.335713,13737.548358,1658966400,4


Derive features with TA Lib

In [20]:
close = features['close']
high = features['high']
low = features['low']

# Hilbert Transform - Instantaneous Trendline
features['trend'] = ta.HT_TRENDLINE(close)

# RSI
features['rsi'] = ta.RSI(close, timeperiod=20)

# Stochastic RSI
features['rsi_fast_k'], features['rsi_fast_d'] = ta.STOCHRSI(
    close, 
    timeperiod=14, 
    fastk_period=5, 
    fastd_period=3, 
    fastd_matype=0
)

# Williams' %R
features['williams_r'] = ta.WILLR(high, low, close, timeperiod=14)

# MACD, MACD Signal, MACD Histogram
features['macd'], features['macd_signal'], features['macd_hist'] = ta.MACD(
    close, 
    fastperiod=12, 
    slowperiod=26, 
    signalperiod=9
)

# Moving Averages
features['ma_20'] = ta.MA(close, timeperiod=20, matype=0)
features['ma_50'] = ta.MA(close, timeperiod=50, matype=0)
features['ma_65'] = ta.MA(close, timeperiod=65, matype=0)
features['ma_200'] = ta.MA(close, timeperiod=200, matype=0)

# Bollinger Bands
features['bb_upp'], features['bb_mid'], features['bb_low'] = ta.BBANDS(
    close, 
    timeperiod=5, 
    nbdevup=2, 
    nbdevdn=2, 
    matype=0
)

In [21]:
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3651 entries, 0 to 3650
Data columns (total 21 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   close        3651 non-null   float64
 1   high         3651 non-null   float64
 2   low          3651 non-null   float64
 3   open         3651 non-null   float64
 4   timestamp    3651 non-null   int64  
 5   volume       3651 non-null   int64  
 6   trend        3588 non-null   float64
 7   rsi          3631 non-null   float64
 8   rsi_fast_k   3631 non-null   float64
 9   rsi_fast_d   3631 non-null   float64
 10  williams_r   3638 non-null   float64
 11  macd         3618 non-null   float64
 12  macd_signal  3618 non-null   float64
 13  macd_hist    3618 non-null   float64
 14  ma_20        3632 non-null   float64
 15  ma_50        3602 non-null   float64
 16  ma_65        3587 non-null   float64
 17  ma_200       3452 non-null   float64
 18  bb_upp       3647 non-null   float64
 19  bb_mid