In [None]:
# Sometimes realized volatility is referred to as historical volatility. While implied volatility refers 
# to expected future volatility, realized volatility presents what actually happened in the past.

In [16]:
import numpy as np
import pandas as pd
import yfinance as yf

In [17]:
#download stock data from yahooFinance

stock = 'ROKU'
df = yf.download(stock, start='2020-1-1')

[*********************100%***********************]  1 of 1 completed


In [18]:
#let's see how many rows of data we got

print(f"downloaded {df.shape[0]} rows")

downloaded 747 rows


In [19]:
# check the contents

df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,135.990005,137.330002,133.580002,137.100006,137.100006,9924400
2020-01-03,134.509995,138.889999,134.0,137.509995,137.509995,8637000
2020-01-06,136.259995,144.550003,135.755005,143.369995,143.369995,14422000
2020-01-07,144.009995,144.229996,137.910004,138.190002,138.190002,11574700
2020-01-08,137.110001,137.929993,132.850006,134.639999,134.639999,11291300


In [20]:
# form a new dataframe with adjusted close prices. We prefer adjusted prices sincee they
# reflect the value of the asset more accurately. It factors in the stock splits, dividends, diluted shares, etc.

dfN = pd.DataFrame()
dfN['Close'] = df["Adj Close"].astype(int)
dfN.dropna(axis=0, inplace=True)
dfN.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2020-01-02,137
2020-01-03,137
2020-01-06,143
2020-01-07,138
2020-01-08,134


In [21]:
dfN['simple_r'] = dfN.Close.pct_change() #calculate simple returns just to observe differences
dfN['log_r'] = np.log(dfN.Close/dfN.Close.shift(1)) #calculate log returns
dfN.head()

Unnamed: 0_level_0,Close,simple_r,log_r
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-02,137,,
2020-01-03,137,0.0,0.0
2020-01-06,143,0.043796,0.042864
2020-01-07,138,-0.034965,-0.035591
2020-01-08,134,-0.028986,-0.029414


In [22]:
dfN.drop(["Close","simple_r"],axis=1,inplace=True)
dfN.dropna(axis=0,inplace=True)
dfN.head()

Unnamed: 0_level_0,log_r
Date,Unnamed: 1_level_1
2020-01-03,0.0
2020-01-06,0.042864
2020-01-07,-0.035591
2020-01-08,-0.029414
2020-01-09,-0.007491


In [23]:
# realized volatility function

def realized_volatility(x):
    return np.sqrt(np.sum(x**2))

In [24]:
# Realized volatility is annualized by multiplying daily realized variance 
# with a number of trading days/weeks/ months in a year. The square root of 
# the annualized realized variance is the realized volatility.

# data grouped monthly - we apply realized volatility function to aggregated monthly values

df_rVol = dfN.groupby(pd.Grouper(freq='M')).apply(realized_volatility)
df_rVol.rename(columns={'log_r':'rVol'},inplace=True) #change col.name

In [25]:
#annualize the values in rel_vol column in df_relvol
df_rVol.rVol = df_rVol.rVol * np.sqrt(12)

In [26]:
df_rVol.head()

Unnamed: 0_level_0,rVol
Date,Unnamed: 1_level_1
2020-01-31,0.483056
2020-02-29,0.534913
2020-03-31,1.34396
2020-04-30,0.837913
2020-05-31,0.724887


In [27]:
dfN.head()

Unnamed: 0_level_0,log_r
Date,Unnamed: 1_level_1
2020-01-03,0.0
2020-01-06,0.042864
2020-01-07,-0.035591
2020-01-08,-0.029414
2020-01-09,-0.007491


In [28]:
import cufflinks as cf
cf.go_offline()
dfN.iplot(title='Log Return')
df_rVol.iplot(title='Realized Volatility')
df['Close'].iplot(title=stock+' price')