In [3]:
import pandas as pd
from pandas.tseries.offsets import BDay
import numpy as np
import matplotlib.pyplot as plt
import psycopg2 as pg
from scipy import stats
from cmdstanpy import CmdStanModel
import arviz as az

from config import DATABASE_URI

plt.rcParams["figure.figsize"] = (15,10)

In [4]:
with pg.connect(DATABASE_URI) as conn:
    with conn.cursor() as cur:
        cur.execute("SELECT date, close FROM prices WHERE frequency='MINUTE' AND ticker='^GSPC'")
        result = cur.fetchall()
    
data = pd.DataFrame.from_records(result, columns=["date", "close"], coerce_float=True, index="date").squeeze()

results_RV = {}
results_BV = {}
results_QV = {}
for idx, day in data.groupby(data.index.date):
    returns = np.log(day) - np.log(day.shift(1))
    results_RV[idx] = np.sum(np.square(returns))
    results_BV[idx] = (np.pi/2) * np.sum(np.abs(returns) * np.abs(returns.shift(1)))
    results_QV[idx] = 390 * np.sum(np.abs(returns) * np.abs(returns.shift(1)) * np.abs(returns.shift(2)) * np.abs(returns.shift(3)))

# Realized variance
RV = pd.Series(results_RV)
RV = RV.reindex(pd.to_datetime(RV.index))
# Bipower variation
BV = pd.Series(results_BV)
BV = BV.reindex(pd.to_datetime(BV.index))
# Quadpower variation
QV = pd.Series(results_QV)
QV = QV.reindex(pd.to_datetime(QV.index))
# Jump test statistic
theta = (np.square(np.pi)/4) + np.pi - 5
J = (np.sqrt(390) / np.sqrt(theta * np.maximum(1, QV / np.square(BV)))) * ((BV/RV) - 1)
# Boolean array of jump presence
jumps = J <= stats.norm.ppf(0.01)
# Magnitude of jumps on jump days = RV - BV
jumps_mag = ((RV - BV) * jumps)
# Continuous component of realized variance. On non-jump days it's equal to realized variance,
# on jump days it's equal to the bipower variation
RV_cont = ~jumps * RV + jumps * BV

In [24]:
# Get time indexes of where jumps occured
jump_indexes = np.arange(0, len(jumps))[jumps]
# number of days between jumps
jump_intervals = np.diff(jump_indexes)