In [23]:
# YOU CAN SIMPLY RUN THIS FILE TO DOWNLOAD THE LATEST DATA FOR A TICKER

# We are extracting the date and OHLCV (open, high, low, close, volume) of a stock for a given time period.
# This data is meant to be used in combination with TA, but can be used standalone if desired.

In [24]:
import yfinance as yf
from ta import momentum
from ta import volume
from ta import volatility
from ta import trend
from ta import add_all_ta_features
import pandas as pd
import numpy as np

np.seterr(divide='ignore', invalid='ignore')    # remove warning that appears when calculating some TA. Known issue from library

{'divide': 'ignore', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

In [25]:
# To add a new TA column, simply follow the examples below. Some TA might have more than one function, i.e. MACD and MACD signal.
# Use the ta library documentation to find more indicators for each of the below categories.
# Hover over the TA class to see what data is needed: open, high, low, close, volume
# Some TA classes have extra arguments like window size.

In [26]:
# Momentum
def add_momentum_indicators(df: pd.DataFrame):
    df['rsi'] =         momentum.RSIIndicator(df['Close'], window=14).rsi()
    #df['stoch_rsi'] =   momentum.StochRSIIndicator(df['Close'], window=14).stochrsi()
    #df['stoch_rsi_d'] = momentum.StochRSIIndicator(df['Close'], window=14).stochrsi_d()
    #df['stoch_rsi_k'] = momentum.StochRSIIndicator(df['Close'], window=14).stochrsi_k()
    #df['stoch_osc'] =   momentum.StochasticOscillator(df['High'], df['Low'], df['Close'], window=14, smooth_window=3).stoch()
    #df['awesome_osc'] = momentum.AwesomeOscillatorIndicator(df['High'], df['Low'], window1=5, window2=34).awesome_oscillator()

    return df

In [27]:
# Volume
def add_volume_indicators(df: pd.DataFrame):
    #df['force_index'] =         volume.ForceIndexIndicator(df['Close'], df['Volume'], window=13).force_index()
    #df['ease_move'] =           volume.EaseOfMovementIndicator(df['High'], df['Low'], df['Volume'], window=14).ease_of_movement()
    #df['ease_move_signal'] =    volume.EaseOfMovementIndicator(df['High'], df['Low'], df['Volume'], window=14).sma_ease_of_movement()
    df['obv'] =                 volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()
    df['adi'] =                 volume.AccDistIndexIndicator(df['High'], df['Low'], df['Close'], df['Volume']).acc_dist_index()

    return df

In [28]:
# Volatility
def add_volatility_indicators(df: pd.DataFrame):
    df['bbands_upper'] =    volatility.BollingerBands(df['Close'], window=20).bollinger_hband()
    df['bbands_middle'] =   volatility.BollingerBands(df['Close'], window=20).bollinger_mavg()
    df['bbands_lower'] =    volatility.BollingerBands(df['Close'], window=20).bollinger_lband()
    #df['atr'] =             volatility.AverageTrueRange(df['High'], df['Low'], df['Close'], window=14).average_true_range()

    return df

In [29]:
# Trend
def add_trend_indicators(df: pd.DataFrame):
    df['macd'] =        trend.MACD(df['Close'], window_slow=26, window_fast=12, window_sign=9).macd()
    df['macd_signal'] = trend.MACD(df['Close'], window_slow=26, window_fast=12, window_sign=9).macd_signal()
    #df['aroon'] =       trend.AroonIndicator(df['Close'], window=25).aroon_indicator()
    #df['aroon_up'] =    trend.AroonIndicator(df['Close'], window=25).aroon_up()
    #df['aroon_down'] =  trend.AroonIndicator(df['Close'], window=25).aroon_down()
    df['adx'] =         trend.ADXIndicator(df['High'], df['Low'], df['Close'], window=14).adx()
    df['adx_neg'] =     trend.ADXIndicator(df['High'], df['Low'], df['Close'], window=14).adx_neg()
    df['adx_pos'] =     trend.ADXIndicator(df['High'], df['Low'], df['Close'], window=14).adx_pos()
    df['ema_50'] =      trend.EMAIndicator(df['Close'], window=50).ema_indicator()
    #df['ema_200'] =     trend.EMAIndicator(df['Close'], window=200).ema_indicator()
    df['sma_50'] =      trend.SMAIndicator(df['Close'], window=50).sma_indicator()
    #df['sma_200'] =     trend.SMAIndicator(df['Close'], window=200).sma_indicator()
   
    return df

In [30]:
ticker = 'SPY'
start_date = '2013-10-05'
end_date = '2023-11-05' # you can set end date to a future date to download all possible data.
file_location = 'data/ta_stock_data_' + ticker + '.csv'

In [31]:
t_hist = yf.download(ticker, start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed


In [32]:
# Isolating the important columns from dataframe
df: pd.DataFrame = t_hist[['Open', 'High', 'Low', 'Close', 'Volume']]

# Add (or don't add) batches of indicators here.
df = add_momentum_indicators(df)
df = add_volume_indicators(df)
df = add_volatility_indicators(df)
df = add_trend_indicators(df)

# df = add_all_ta_features(df, df['Open'], df['High'], df['Low'], df['Close'], df['Volume'])    # Nuclear option if you are lazy.

df = df.round(8)
df = df.dropna()    # important to note that the first x rows will be dropped, where x is the largest window size from the TA indicators above.

print(df.shape)
print(df.isnull().sum())

df

(2489, 18)
Open             0
High             0
Low              0
Close            0
Volume           0
rsi              0
obv              0
adi              0
bbands_upper     0
bbands_middle    0
bbands_lower     0
macd             0
macd_signal      0
adx              0
adx_neg          0
adx_pos          0
ema_50           0
sma_50           0
dtype: int64


Unnamed: 0_level_0,Open,High,Low,Close,Volume,rsi,obv,adi,bbands_upper,bbands_middle,bbands_lower,macd,macd_signal,adx,adx_neg,adx_pos,ema_50,sma_50
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2013-12-16,178.949997,179.809998,178.899994,179.220001,96195000,52.576000,734359100,7.601497e+08,181.928022,179.864500,177.800977,0.630056,1.106733,23.280045,26.679039,25.524777,176.755757,176.543800
2013-12-17,179.380005,179.410004,178.250000,178.649994,89886000,49.699317,644473100,7.322529e+08,181.949131,179.825999,177.702868,0.521836,0.989754,22.236518,28.493765,23.946821,176.830041,176.768200
2013-12-18,178.919998,181.729996,177.320007,181.699997,234906000,61.756939,879379100,9.639630e+08,182.198259,179.959499,177.720739,0.674407,0.926684,21.600656,22.738584,29.735750,177.021019,177.092600
2013-12-19,181.179993,181.699997,180.710007,181.490005,136531200,60.678468,742847900,1.042574e+09,182.334373,180.110500,177.886626,0.769505,0.895249,21.010213,21.679950,28.351351,177.196274,177.410400
2013-12-20,180.690002,181.990005,180.570007,181.559998,197087000,60.923414,939934900,1.120296e+09,182.501799,180.192999,177.884200,0.840827,0.884364,20.638490,20.225425,27.819443,177.367400,177.658200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-30,413.559998,416.679993,412.220001,415.589996,86562700,36.738939,214130300,1.842017e+10,441.554076,425.947499,410.340922,-5.509691,-4.088800,32.069243,33.936833,11.198075,431.368599,434.711401
2023-10-31,416.179993,418.529999,414.209991,418.200012,79665200,40.408677,293795500,1.848766e+10,441.641716,425.778000,409.914284,-5.326879,-4.336416,32.811328,32.058124,12.948862,430.852184,434.288601
2023-11-01,419.200012,423.500000,418.649994,422.660004,98068100,46.156548,391863600,1.855176e+10,441.593774,425.678000,409.762225,-4.767163,-4.422565,32.154781,29.873160,18.457597,430.530922,433.978801
2023-11-02,426.579987,430.920013,426.559998,430.760010,94938900,54.702066,486802500,1.863973e+10,442.047396,425.991000,409.934605,-3.628157,-4.263684,29.996907,26.806682,25.784026,430.539905,433.733401


In [33]:
df.to_csv(file_location)