The Objective of this notebook is to z score normalise time series stock data (accounting for look forward errors etc.)

In [89]:
import datetime as dt
import pandas as pd
from pandas_datareader import data as pdr
import plotly.offline as pyo
import plotly.graph_objects as go
from plotly.subplots import make_subplots

pyo.init_notebook_mode(connected=True)

pd.options.plotting.backend = 'plotly'

In [90]:
#fixing error
import yfinance as yfin

yfin.pdr_override()

end = dt.datetime(2023, 1, 1)
start = dt.datetime(2006,1,1)

df = pdr.get_data_yahoo('BHP.AX', start=start, end=end)

df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006-01-03,21.286575,21.660353,21.267887,21.660353,9.715583,7280093
2006-01-04,22.024784,22.333151,21.950029,22.286428,9.996402,16274002
2006-01-05,22.305117,22.305117,22.024784,22.052818,9.891621,11436552
2006-01-06,21.912651,21.912651,21.772486,21.819208,9.786836,7104800
2006-01-09,22.211674,22.520039,22.174295,22.48266,10.084421,14577664


In [91]:
# moving average
df['MA50'] = df['Close'].rolling(window=50, min_periods=0).mean()
df['MA200'] = df['Close'].rolling(window=200, min_periods=0).mean()

# moving standard deviation
df['MD200'] = df['Close'].rolling(window=200, min_periods=0).std()

df['MA200'].head(10), df['MD200'].head(10)


(Date
 2006-01-03    21.660353
 2006-01-04    21.973391
 2006-01-05    21.999866
 2006-01-06    21.954702
 2006-01-09    22.060294
 2006-01-10    22.097983
 2006-01-11    22.128908
 2006-01-12    22.142758
 2006-01-13    22.146262
 2006-01-16    22.150000
 Name: MA200, dtype: float64,
 Date
 2006-01-03         NaN
 2006-01-04    0.442702
 2006-01-05    0.316379
 2006-01-06    0.273660
 2006-01-09    0.334538
 2006-01-10    0.313138
 2006-01-11    0.297334
 2006-01-12    0.278051
 2006-01-13    0.260305
 2006-01-16    0.245702
 Name: MD200, dtype: float64)

In [92]:
df2 = pdr.get_data_yahoo('FMG.AX', start=start, end=end)
df3 = pdr.get_data_yahoo('RIO.AX', start=start, end=end)


def z_norm(data, w=200):
    data['MA200'] = data['Close'].rolling(window=w, min_periods=0).mean()
    data['MD200'] = data['Close'].rolling(window=w, min_periods=0).std()
    data['ZNorm'] = (data['Close'] - data['MA200'])/data['MD200']
    return data['ZNorm']

df2['ZNorm'] = z_norm(df2)
df['ZNorm'] = z_norm(df)
df3['ZNorm'] = z_norm(df3)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [110]:
BHP = pd.Series(df["ZNorm"])
RIO = pd.Series(df3["ZNorm"])
coint = df['Close']/df3['Close']

In [112]:
fig = make_subplots(rows=1, cols=1, shared_xaxes=True, subplot_titles=(""), row_width=[1])
fig.add_trace(go.Scatter(x=df.index, y=df["ZNorm"], marker_color='black',name="BHP", showlegend=True), row=1, col=1)
#fig.add_trace(go.Scatter(x=df.index, y=df2["ZNorm"], marker_color='blue',name="FMG", showlegend=True), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df3["ZNorm"], marker_color='yellow',name="RIO", showlegend=True), row=1, col=1)
# fig.add_trace(go.Scatter(x=df.index, y=coint, marker_color='green',name="coint", showlegend=True), row=1, col=1)


th = 2
fig.add_hline(y=th, line_color="green")
fig.add_hline(y=-th, line_color="red")
fig.update(layout_xaxis_rangeslider_visible=True)

fig.show()

# Take 2