In [3]:
import os
os.chdir('..')

Refer to the following source for more information:
 
"Harvesting the HAR-X Volatility Model" https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4733597

# Import Files

In [4]:
import matplotlib.pyplot as plt
import seaborn as sns

from src.yahoo_finance import *

In [33]:
params = {
    'symbol': 'SPY',
    'start_date': '2000-01-03',
    'end_date': '2021-12-28',
    'interval': '1d'
}
yf = VolatilityYF(**params)
df = yf.pipeline()
df.head(5)

[*********************100%***********************]  1 of 1 completed


Price,date,Adj Close,Close,High,Low,Open,Volume,log_return
0,2000-01-03,91.887756,145.4375,148.25,143.875,148.25,8164300,
1,2000-01-04,88.294426,139.75,144.0625,139.640625,143.53125,8089800,-0.039891
2,2000-01-05,88.452339,140.0,141.53125,137.25,139.9375,12177900,0.001787
3,2000-01-06,87.0308,137.75,141.5,137.75,139.625,6227200,-0.016202
4,2000-01-07,92.08522,145.75,145.75,140.0625,140.3125,8066500,0.056452


# Volatility Estimates

In [34]:
class LowFrequencyVolatility:
    def __init__(self, df:pd.DataFrame):
        self.df = df.copy()
    
    def rogers_satchell_volatility(self, close:float, high:float, low:float, open:float) -> float:
        first_term = np.log(high/open)* np.log(high/close)
        second_term = np.log(low/open)* np.log(low/close)
        return np.sqrt(first_term + second_term)
    
    def garman_klass_volatility(self, close:float, high:float, low:float, open:float) -> float:
        first_term = 0.5* np.log(high/low)**2
        second_term = (2*np.log(2) - 1)* np.log(close/open)**2
        return np.sqrt(first_term - second_term)
    
    def parkinson_volatility(self, high:float, low:float) -> float:
        squared_log_hl = np.log(high/low)**2
        denom = 4*np.log(2)
        return np.sqrt(squared_log_hl/denom)
    
    def average_volatility(self, close:float, high:float, low:float, open:float) -> float:
        numerator = self.parkinson_volatility(high, low) + self.garman_klass_volatility(close, high, low, open) + self.rogers_satchell_volatility(close, high, low, open)
        return numerator/3
    
    def pipeline(self) -> pd.DataFrame:
        self.df['rogers_satchell'] = self.rogers_satchell_volatility(self.df['Close'], self.df['High'], self.df['Low'], self.df['Open'])
        self.df['garman_klass'] = self.garman_klass_volatility(self.df['Close'], self.df['High'], self.df['Low'], self.df['Open'])
        self.df['parkinson_volatility'] = self.parkinson_volatility(self.df['High'], self.df['Low'])
        self.df['average_volatility'] = self.average_volatility(self.df['Close'], self.df['High'], self.df['Low'], self.df['Open'])
        return self.df

In [35]:
lfv = LowFrequencyVolatility(df)
df = lfv.pipeline()
df.head(5)

Price,date,Adj Close,Close,High,Low,Open,Volume,log_return,rogers_satchell,garman_klass,parkinson_volatility,average_volatility
0,2000-01-03,91.887756,145.4375,148.25,143.875,148.25,8164300,,0.017988,0.01752,0.01799,0.017832
1,2000-01-04,88.294426,139.75,144.0625,139.640625,143.53125,8089800,-0.039891,0.011567,0.014512,0.018723,0.014934
2,2000-01-05,88.452339,140.0,141.53125,137.25,139.9375,12177900,0.001787,0.022536,0.021718,0.018447,0.020901
3,2000-01-06,87.0308,137.75,141.5,137.75,139.625,6227200,-0.016202,0.018929,0.017032,0.016131,0.017364
4,2000-01-07,92.08522,145.75,145.75,140.0625,140.3125,8066500,0.056452,0.008425,0.015289,0.023905,0.015873


In [36]:
df.iloc[:, -4:].describe().round(3).T.loc[:, ['mean', '50%', 'min', 'max']]

Unnamed: 0_level_0,mean,50%,min,max
Price,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rogers_satchell,0.008,0.006,0.0,0.087
garman_klass,0.008,0.006,0.001,0.082
parkinson_volatility,0.008,0.006,0.001,0.072
average_volatility,0.008,0.006,0.001,0.078


# Exogenous Variables