In [65]:
import polars as pl
import matplotlib.pyplot as plt
import urllib.request
import datetime

In [67]:
start_date = int(datetime.datetime(1990, 1, 1).timestamp())
today = datetime.date.today()
end_date = int(
    datetime.datetime(today.year, today.month, today.day - 1, 23, 59, 59).timestamp()
)

vix_url = f"https://query1.finance.yahoo.com/v7/finance/download/%5EVIX?period1={start_date}&period2={end_date}&interval=1d&events=history&includeAdjustedClose=true"
spx_url = f"https://query1.finance.yahoo.com/v7/finance/download/%5ESPX?period1={start_date}&period2={end_date}&interval=1d&events=history&includeAdjustedClose=true"

In [68]:
with urllib.request.urlopen(spx_url) as response:
    spx_html = response.read()
    spx_data = pl.read_csv(
        spx_html,
        dtypes=[
            pl.Date,
            pl.Float64,
            pl.Float64,
            pl.Float64,
            pl.Float64,
            pl.Float64,
            pl.Int64,
        ],
        ignore_errors=True,
    )

with urllib.request.urlopen(vix_url) as response:
    vix_html = response.read()
    vix_data = pl.read_csv(
        vix_html,
        dtypes=[
            pl.Date,
            pl.Float64,
            pl.Float64,
            pl.Float64,
            pl.Float64,
            pl.Float64,
            pl.Int64,
        ],
        ignore_errors=True,
    )

In [91]:
def yang_zhang_vol(ohlc: pl.Expr, N: int) -> pl.Expr:
    h_c = pl.col("High").log() - pl.col("Close").log()
    h_o = pl.col("High").log() - pl.col("Open").log()
    l_c = pl.col("Low").log() - pl.col("Close").log()
    l_o = pl.col("Low").log() - pl.col("Open").log()
    return ((1 / N) * (h_c * h_o + l_c * l_o).rolling_sum(N)).sqrt()

In [116]:
spx = spx_data.select(
    [
        pl.col("Date"),
        pl.col("Close").alias("spx"),
        # Shift this 1 period forward so the value on a day doesn't use that day's information
        # in the calculation
        yang_zhang_vol(pl.col(["Open", "High", "Low", "Close"]), 21)
        .alias("spx_real_vol")
        .shift(1)
        * np.sqrt(252)
        * 100,
    ]
)

vix = vix_data.select(
    [
        pl.col("Date"),
        pl.col("Close").alias("vix"),
    ]
)
data = spx.join(vix, on="Date", how="inner")

# Daily log return
data = data.with_columns(
    [
        pl.col("spx").log().diff().alias("spx_ret"),
        pl.col("vix").log().diff().alias("vix_ret"),
    ]
)
# Return over the NEXT 21 days
data = data.with_columns(
    [
        pl.col("spx_ret").rolling_sum(21).shift(-21).alias("spx_for_ret"),
        pl.col("vix_ret").rolling_sum(21).shift(-21).alias("vix_for_ret"),
    ]
)
# VIX return in standard deviation terms
data = data.with_columns(
    [
        (pl.col("vix_ret") / pl.col("vix_ret").rolling_std(21).shift(1)).alias(
            "vix_ret_std"
        ),
        (pl.col("vix") - pl.col("spx_real_vol")).alias("iv_rv"),
    ]
)

data = data.with_columns((pl.col("iv_rv") - pl.col("iv_rv").rolling_mean(42)).alias("iv_rv_avg"))

data = data.drop_nulls()

In [105]:
x = data.select(pl.col(["vix_ret_std", "iv_rv", "iv_rv_avg"]))
y = data["vix_for_ret"]

In [120]:
x

vix_ret_std,iv_rv,iv_rv_avg
f64,f64,f64
1.100416,10.870309,2.382847
2.336899,14.001319,5.383701
-1.573858,11.109525,2.426414
1.006532,12.82327,4.021164
-0.507814,11.842325,2.956413
0.349375,12.337812,3.361636
0.335096,12.755534,3.66977
-0.379408,12.303149,3.089353
-2.747799,8.54715,-0.702761
1.297306,10.653704,1.335399
