In [1]:
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import yfinance as yf
import matplotlib.pyplot as plt

In [None]:
# class SNP500():
#     def __init__(self):
#         self.retrieve()
    
#     def retrieve(self, ticker='^GSPC', intv='1m'):
#         self.df = yf.download(ticker, interval=intv)

#     def bipower_variance(self, r: list[float], K=390) -> float:
#         a = np.abs(r[::-1][: K])
#         b = np.abs(r[::-1][1: K+1])
        
#         return np.pi / (2 * K) * np.sum(a * b)

#     def local_volatility(self, r: list[float], K=390) -> float:
#         return np.sqrt(self.bipower_variance(r, K))

Loading S&P 500 data from *start_data* to *end_data* with interval *intv*. Ticker for S&P 500 is ^GSPC.

In [None]:
start_date = '2015-01-01'
end_date = '2020-01-01'
ticker = '^GSPC'
intv = '1m'

market_data = yf.download(ticker, interval=intv)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


1-min return is defined as $r_t = \log_{10}(p_t / p_{t-1})$ where $p_t$ is the price at $t$.

In [3]:
market_data['Return'] = np.log(market_data['Close'] / market_data['Open'])

Estimating rolloing volatility by using

$$
\sigma_t^2 = \frac{\pi}{2K} \sum_{i=1}^K | r_{t-i} | | r_{t-i+1} |
$$

where $K=390$. Note that one day contains $K=390$ of 1-min data, and hence, the above volatility estimate is based on a day of S&P500 data. For the definition and explanation, see https://arxiv.org/pdf/2106.07040 and references therein.

In [34]:
def bipower_variance(r: list[float], K=390) -> float:
    a = np.abs(r[::-1][: K])
    b = np.abs(r[::-1][1: K+1])
    
    return np.pi / (2 * K) * np.sum(a * b)

def local_volatility(r: list[float], K=390) -> float:
    return np.sqrt(bipower_variance(r, K))

In [43]:
market_data['Variance'] = market_data['Return'].rolling(390).apply(bipower_variance)
market_data['Volatility'] = market_data['Return'].rolling(390).apply(local_volatility)
market_data['Jump'] = np.abs(market_data['Return'] / market_data['Volatility'])

so far periodicity factor is not accounted for. 