In [1]:
# !pip install ta
# can use TA-lib also but it is not native to python

# https://medium.com/fintechexplained/automating-stock-investing-technical-analysis-with-python-81c669e360b2
# https://technical-analysis-library-in-python.readthedocs.io/en/latest/
# https://python.plainenglish.io/a-simple-guide-to-plotly-for-plotting-financial-chart-54986c996682

In [2]:
import pandas as pd
from pandas_datareader import data

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from ta.trend import ema_indicator
from ta.trend import MACD
from ta.momentum import rsi
from ta.volatility import BollingerBands

Collecting ta
  Downloading ta-0.7.0.tar.gz (25 kB)
Using legacy 'setup.py install' for ta, since package 'wheel' is not installed.
Installing collected packages: ta
    Running setup.py install for ta: started
    Running setup.py install for ta: finished with status 'done'
Successfully installed ta-0.7.0


You should consider upgrading via the 'c:\users\razer\appdata\local\programs\python\python39\python.exe -m pip install --upgrade pip' command.


In [3]:
tickers = ['SPY']
start_date = '2000-01-01'
end_date = '2021-09-27'

df = data.DataReader(tickers,'yahoo', start_date, end_date)

In [4]:
# check no missing values
df.isnull().sum()

Attributes  Symbols
Adj Close   SPY        0
Close       SPY        0
High        SPY        0
Low         SPY        0
Open        SPY        0
Volume      SPY        0
dtype: int64

In [5]:
# save to/read from csv
# df.to_csv('SPY_2000_to_2021.csv')
# df = pd.read_csv('SPY_2000_to_2021.csv', index_col = 0, header=[0,1])

In [6]:
df.head()

Attributes,Adj Close,Close,High,Low,Open,Volume
Symbols,SPY,SPY,SPY,SPY,SPY,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1999-12-31,98.155968,146.875,147.5,146.25,146.84375,3172700.0
2000-01-03,97.195328,145.4375,148.25,143.875,148.25,8164300.0
2000-01-04,93.394325,139.75,144.0625,139.640625,143.53125,8089800.0
2000-01-05,93.561417,140.0,141.53125,137.25,139.9375,12177900.0
2000-01-06,92.057785,137.75,141.5,137.75,139.625,6227200.0


In [7]:
# calculate RSI based on standard 14-day window 
df[('RSI_14','SPY')] = rsi(close=df[('Close','SPY')], window=14)
df[('RSI_20','SPY')] = rsi(close=df[('Close','SPY')], window=20)

In [8]:
# calculate MACD based on standard MACD line = 12-day - 26-day EMA, and signal line = 9-day EMA
macd = MACD(df[('Close','SPY')], window_slow=26, window_fast=12, window_sign=9)
df[('MACD','SPY')] = macd.macd()
df[('MACD_Signal','SPY')] = macd.macd_signal()
df[('MACD_Diff','SPY')] = macd.macd_diff()

In [9]:
# calculate MA
df[('MA_50','SPY')] = df[('Close','SPY')].rolling(window=50).mean()
df[('MA_100','SPY')] = df[('Close','SPY')].rolling(window=100).mean()
df[('MA_200','SPY')] = df[('Close','SPY')].rolling(window=200).mean()

In [10]:
# calculate EMA
df[('EMA_30','SPY')] = ema_indicator(df[('Close','SPY')], window=30)
df[('EMA_50','SPY')] = ema_indicator(df[('Close','SPY')], window=50)
df[('EMA_100','SPY')] = ema_indicator(df[('Close','SPY')], window=100)
df[('EMA_200','SPY')] = ema_indicator(df[('Close','SPY')], window=200)

In [11]:
# use shorter timeframe for presentation charting
df_sub = df.loc['2020-01-01':'2021-09-27']
df_sub.head()

Attributes,Adj Close,Close,High,Low,Open,Volume,RSI_14,RSI_20,MACD,MACD_Signal,MACD_Diff,MA_50,MA_100,MA_200,EMA_30,EMA_50,EMA_100,EMA_200
Symbols,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2020-01-02,315.82486,324.869995,324.890015,322.529999,323.540009,59151200.0,74.923361,72.209727,3.324846,3.228434,0.096413,312.254999,303.325999,296.659949,316.795605,312.958869,306.303621,297.996114
2020-01-03,313.43335,322.410004,323.640015,321.100006,321.160004,77709700.0,64.298258,65.040701,3.191316,3.22101,-0.029694,312.722999,303.669399,296.864249,317.157824,313.329501,306.622559,298.239038
2020-01-06,314.62912,323.640015,323.730011,320.359985,320.48999,55653900.0,66.831094,66.776736,3.148451,3.206498,-0.058047,313.198199,303.980299,297.058799,317.57603,313.733835,306.959538,298.491784
2020-01-07,313.744415,322.730011,323.540009,322.23999,323.019989,40496400.0,63.255604,64.290441,3.006395,3.166478,-0.160083,313.645399,304.368599,297.276199,317.908545,314.086627,307.271825,298.73296
2020-01-08,315.416595,324.450012,325.779999,322.670013,322.940002,68296000.0,66.864099,66.753292,2.998044,3.132791,-0.134747,314.102399,304.766599,297.503249,318.330575,314.493034,307.611987,298.988851


In [12]:
# obtain list of empty dates to exclude in chart
dt_all = pd.date_range(start=df_sub.index[0], end=df_sub.index[-1])
dt_obs = [d.strftime("%Y-%m-%d") for d in pd.to_datetime(df_sub.index)]
dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in dt_obs]

In [13]:
# initialise empty plot with 4 subplots
fig = go.Figure()

fig = make_subplots(rows=4, cols=1, shared_xaxes=True,
                   vertical_spacing=0.01,
                   row_heights=[0.5, 0.1, 0.2, 0.2])

# 1st plot
# a) plot price data as candlesticks
fig.add_trace(go.Candlestick(x=df_sub.index,
                             open=df_sub[('Open','SPY')],
                             high=df_sub[('High','SPY')],
                             low=df_sub[('Low','SPY')],
                             close=df_sub[('Close','SPY')],
                             name='Prices',
                             hoverinfo='x+y+z'))

# b) plot moving averages
fig.add_trace(go.Scatter(x=df_sub.index, 
                         y=df_sub[('EMA_50','SPY')], 
                         opacity=0.5, 
                         line=dict(color='blue', width=1.5), 
                         name='EMA 50'))

fig.add_trace(go.Scatter(x=df_sub.index, 
                         y=df_sub[('EMA_200','SPY')], 
                         opacity=0.5, 
                         line=dict(color='orange', width=1.5), 
                         name='EMA 200'))

# 2nd plot
# a) set colours based on open and close prices
colors = ['green' if row[('Open','SPY')] - row[('Close','SPY')] >= 0 
          else 'red' for index, row in df.iterrows()]

# b) plot volume as bar chart
fig.add_trace(go.Bar(x=df_sub.index, 
                     y=df_sub[('Volume','SPY')],
                     marker_color=colors,
                     name='Volume'
                    ), row=2, col=1)

# 3rd plot
# a) set MACD bar colour based on values
colors = ['green' if value >= 0 
          else 'red' for value in df_sub[('MACD_Diff','SPY')]]

# b) plot MACD bar
fig.add_trace(go.Bar(x=df_sub.index, 
                     y=df_sub[('MACD_Diff','SPY')],
                     marker_color=colors,
                     name='MACD Diff'
                    ), row=3, col=1)

# c) plot MACD line
fig.add_trace(go.Scatter(x=df_sub.index,
                         y=df_sub[('MACD','SPY')],
                         line=dict(color='black', width=0.75),
                         name='MACD'
                        ), row=3, col=1)

# d) plot MACD signal line
fig.add_trace(go.Scatter(x=df_sub.index,
                         y=df_sub[('MACD_Signal','SPY')],
                         line=dict(color='blue', width=0.5),
                         name='MACD Signal'
                        ), row=3, col=1)

# 4th plot
# a) plot RSI
fig.add_trace(go.Scatter(x=df_sub.index,
                         y=df_sub[('RSI_14','SPY')],
                         line=dict(color='blue', width=0.75),
                         name='RSI 14'
                        ), row=4, col=1)

fig.add_trace(go.Scatter(x=df_sub.index,
                         y=df_sub[('RSI_20','SPY')],
                         line=dict(color='black', width=0.75),
                         name='RSI 20'
                        ), row=4, col=1)

# b) plot RSI oversold/overbought regions
fig.add_hrect(y0=0, y1=30, line_width=0, fillcolor='palegreen', opacity=0.2, row=4, col=1)
fig.add_hrect(y0=70, y1=100, line_width=0, fillcolor='salmon', opacity=0.2, row=4, col=1)

# name subplots
fig.update_yaxes(title_text='Prices', row=1, col=1)
fig.update_yaxes(title_text='Volume', row=2, col=1)
fig.update_yaxes(title_text='MACD', row=3, col=1)
fig.update_yaxes(title_text='RSI', range=[0,100], row=4, col=1)

# add chart title
# remove empty dates, range slider, legend
fig.update_layout(xaxis_rangebreaks=[dict(values=dt_breaks)],
                  xaxis_rangeslider_visible = False,
                  margin=dict(l=20, r=20, t=30, b=20),
                  showlegend=False,
                  hovermode='x unified',
                  title='SPY stock price from 2020 to 2021')

In [14]:
# binary classifier for win/loss condition
df[('Price_Diff','SPY')] = df[('Close','SPY')].shift(21) - df[('Close','SPY')]
df[('Win_Loss','SPY')] = df[('Price_Diff','SPY')].apply(lambda x: 1 if x>0 else 0)
df.drop(columns=('Price_Diff','SPY'), inplace=True)

In [15]:
df.tail()

Attributes,Adj Close,Close,High,Low,Open,Volume,RSI_14,RSI_20,MACD,MACD_Signal,MACD_Diff,MA_50,MA_100,MA_200,EMA_30,EMA_50,EMA_100,EMA_200,Win_Loss
Symbols,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
2021-09-21,433.630005,433.630005,437.910004,433.070007,436.529999,92526100.0,32.628156,39.075475,-1.191917,1.012817,-2.204735,442.621799,432.086399,410.012999,444.084664,441.297797,431.420678,410.578935,1
2021-09-22,437.859985,437.859985,440.029999,433.75,436.049988,102350100.0,40.773831,44.532155,-1.528129,0.504628,-2.032757,442.667199,432.291999,410.353049,443.683071,441.162981,431.548189,410.850388,1
2021-09-23,443.179993,443.179993,444.890015,439.600006,439.850006,76396000.0,49.107908,50.411967,-1.349739,0.133755,-1.483494,442.805999,432.541799,410.723499,443.650615,441.242079,431.778522,411.172076,1
2021-09-24,443.910004,443.910004,444.670013,441.209991,441.440002,62025800.0,50.144619,51.159781,-1.13636,-0.120268,-1.016091,442.989199,432.824699,411.092199,443.66735,441.346704,432.018749,411.497826,1
2021-09-27,442.640015,442.640015,444.049988,441.899994,442.809998,61267400.0,48.301214,49.784912,-1.057541,-0.307723,-0.749819,443.2152,433.093599,411.471149,443.60107,441.397422,432.229071,411.807699,1
