<a href="https://colab.research.google.com/github/howardya/ml-finance/blob/main/volatility/volatility_forecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Summary

US Total Equity Market Vol (RMSE)

| Method | Weekly (2M) |
| --- | --- |
| 2M Vol | 0.0286387|
| 1M Vol | 0.0277460|
|4M Vol|0.0296685|
| 2M Halflife | 0.0274711|
| 1M Halflife | 0.0272838|
|4M Halflife|0.0273250|



# Volatility Forecast

In [1]:
import pandas as pd
import pandas_datareader as pdr
import pandas_datareader.data as web
import datetime
import numpy as np

Print out packages' version

In [2]:
loaded_packages = ['pd', 'pdr', 'np']
for pkg in loaded_packages:
  exec(f'pkg_version = {pkg}.__version__') # get the package's version to be printed out, stored as pkg_version
  print(f'{pkg} version: {pkg_version}')

pd version: 1.1.5
pdr version: 0.9.0
np version: 1.19.5


# Get Data

In [3]:
start_date = '2000-01-01'
end_date = '2021-10-01'
fred_tickers = [
  'WILL5000IND', # https://fred.stlouisfed.org/series/WILL5000IND Wilshire 5000 Total Market Index
  # 'NASDAQ100', # https://fred.stlouisfed.org/series/NASDAQ100 
  # 'SP500', # https://fred.stlouisfed.org/series/SP500
  # 'DGS10', # https://fred.stlouisfed.org/series/DGS10
  # 'DGS2', # https://fred.stlouisfed.org/series/DGS2
]
data = web.DataReader(fred_tickers, 'fred', start = start_date, end=end_date)

In [4]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5675 entries, 2000-01-03 to 2021-10-01
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   WILL5000IND  5474 non-null   float64
dtypes: float64(1)
memory usage: 88.7 KB
None


In [5]:
weekly_data = data.resample('W').last() # get the last weekly trading price, dates are on Sunday
weekly_data_returns = np.log1p(weekly_data.pct_change().add_suffix('_Ret')).dropna() # calculate weekly log returns

Metric

In [6]:
def calculate_error_rmse(pd1, pd2):
  return np.sqrt((pd1.iloc[:,0]-pd2.iloc[:,0]).pow(2).mean())


## Calculate Realized Volatility

Defined as $\sum_i x_i^2$ where $x_i$s are the forward weekly returns

In [7]:
# Calculate the forward realize volatility
forward_window = 8 # in weeks

realized_volatility = np.log1p(
    weekly_data.pct_change().add_suffix('_Ret')
  ).dropna().pow(2).rolling(window = forward_window).mean().shift(-forward_window).pow(0.5).add_suffix('_Realized_Vol').dropna()

realized_volatility = realized_volatility['2020':]

validation_dates = realized_volatility.index

## Method 1: Rolling Vol

In [11]:
simple_vol = weekly_data_returns.pow(2).rolling(window=forward_window).mean().pow(0.5).loc[validation_dates]
print(f'2M Rolling: {calculate_error_rmse(simple_vol, realized_volatility)}')

simple_vol = weekly_data_returns.pow(2).rolling(window=int(0.5*forward_window) ).mean().pow(0.5).loc[validation_dates]
print(f'1M Rolling: {calculate_error_rmse(simple_vol, realized_volatility)}')

simple_vol = weekly_data_returns.pow(2).rolling(window=2*forward_window).mean().pow(0.5).loc[validation_dates]
print(f'4M Rolling: {calculate_error_rmse(simple_vol, realized_volatility)}')

2M Rolling: 0.028638700230523076
1M Rolling: 0.0277459860661588
4M Rolling: 0.02966853807756444


## Method 2: Halflife

In [13]:
predicted_vol = weekly_data_returns.pow(2).ewm(halflife=forward_window).mean().pow(0.5).loc[validation_dates]
print(f'2M Halflife: {calculate_error_rmse(predicted_vol, realized_volatility)}')

predicted_vol = weekly_data_returns.pow(2).ewm(halflife=int(0.5*forward_window) ).mean().pow(0.5).loc[validation_dates]
print(f'1M Halflife: {calculate_error_rmse(predicted_vol, realized_volatility)}')

predicted_vol = weekly_data_returns.pow(2).ewm(halflife=2*forward_window).mean().pow(0.5).loc[validation_dates]
print(f'4M Halflife: {calculate_error_rmse(predicted_vol, realized_volatility)}')

2M Halflife: 0.027471132223528552
1M Halflife: 0.027283839126274276
4M Halflife: 0.027324973671621823


In [18]:
realized_volatility.loc[validation_dates]

Unnamed: 0_level_0,WILL5000IND_Ret_Realized_Vol
DATE,Unnamed: 1_level_1
2020-01-05,0.046827
2020-01-12,0.046731
2020-01-19,0.058426
2020-01-26,0.082801
2020-02-02,0.089962
...,...
2021-07-11,0.012605
2021-07-18,0.013296
2021-07-25,0.011085
2021-08-01,0.011152
