<a href="https://colab.research.google.com/github/alexrmiller2/misc-projects/blob/main/Welcome_To_Colab_222.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [89]:
import yfinance as yf

tickers = {'YMZ25.CBT': 'YMZ', '^VIX': 'VIX', 'DX-Y.NYB': 'DXY'}
data = yf.download(list(tickers.keys()), period='30d', interval='15m', group_by='ticker')
data.columns = [f"{tickers[t]}_{c}" for t, c in data.columns]

data = data.sort_index(axis=1)
data = data.drop(columns=['VIX_Volume', 'DXY_Volume']).tz_convert("US/Eastern")

data.info()


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  3 of 3 completed

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2623 entries, 2025-09-22 03:15:00-04:00 to 2025-10-31 12:30:00-04:00
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   DXY_Close   2236 non-null   float64
 1   DXY_High    2236 non-null   float64
 2   DXY_Low     2236 non-null   float64
 3   DXY_Open    2236 non-null   float64
 4   VIX_Close   1546 non-null   float64
 5   VIX_High    1546 non-null   float64
 6   VIX_Low     1546 non-null   float64
 7   VIX_Open    1546 non-null   float64
 8   YMZ_Close   2282 non-null   float64
 9   YMZ_High    2282 non-null   float64
 10  YMZ_Low     2282 non-null   float64
 11  YMZ_Open    2282 non-null   float64
 12  YMZ_Volume  2282 non-null   float64
dtypes: float64(13)
memory usage: 286.9 KB





In [1]:
import numpy as np
import pandas as pd

def kalman_fair(
    df: pd.DataFrame,
    dt: float = 1.0,
    q_price: float = 1e-4,
    q_drift: float = 5e-6,
    r_base: float = 5e-3,
    vol_lookback: int = 20,
    vol_alpha: float = 0.5,
    use_adaptive_R: bool = True
) -> pd.DataFrame:
    data = df.copy()

    # Fill missing values in one go
    cols = ["YMZ_Close", "YMZ_High", "YMZ_Low", "YMZ_Volume"]
    data[cols] = data[cols].ffill().bfill().astype(float)

    z = data["YMZ_Close"].values

    # Volatility scaling
    tr = (data["YMZ_High"] - data["YMZ_Low"]).abs()
    tr_ma = tr.rolling(vol_lookback, min_periods=1).mean()

    log_vol = np.log(data["YMZ_Volume"].replace(0, np.nan).ffill())
    vol_mean = log_vol.rolling(vol_lookback, min_periods=1).mean()
    vol_std = log_vol.rolling(vol_lookback, min_periods=1).std(ddof=0) + 1e-12
    vol_z = (log_vol - vol_mean) / vol_std

    r_scale = (1 - vol_alpha) * (tr / (tr_ma + 1e-12)) \
              + vol_alpha * (np.exp(np.clip(vol_z, -2, 2)) / np.e + 0.5)
    r_scale = r_scale.clip(0.25, 4.0).values

    # Kalman filter setup
    kf = KalmanFilter(dim_x=2, dim_z=1)
    kf.F = np.array([[1.0, dt], [0.0, 1.0]])
    kf.H = np.array([[1.0, 0.0]])
    kf.Q = np.diag([q_price, q_drift])
    kf.R = np.array([[r_base]])
    kf.P = np.diag([1e3, 1e3])  # large initial uncertainty
    kf.x = np.array([[z[0]], [0.0]])

    x_filt = np.zeros((len(z), 2))
    p_var = np.zeros(len(z))
    innov = np.full(len(z), np.nan)

    for i, zi in enumerate(z):
        kf.predict()
        if use_adaptive_R:
            kf.R[0, 0] = r_base * r_scale[i]
        if not np.isnan(zi):
            innov[i] = zi - float(kf.H @ kf.x)
            kf.update([zi])
        x_filt[i] = kf.x.ravel()
        p_var[i] = kf.P[0, 0]

    return pd.DataFrame({
        "kalman_price": x_filt[:, 0],
        "kalman_drift": x_filt[:, 1],
        "price_var": p_var,
        "innovation": innov,
        "raw_price": z,
    }, index=data.index)


def append_features(data):
    expected_columns = [
        'DXY_Close', 'DXY_High', 'DXY_Low', 'DXY_Open',
        'VIX_Close', 'VIX_High', 'VIX_Low', 'VIX_Open',
        'YMZ_Close', 'YMZ_High', 'YMZ_Low', 'YMZ_Open',
        'YMZ_Volume']

    if list(data.columns) != expected_columns:
        print("Error: Column mismatch.")
        print(f"Received columns: {list(data.columns)}")
        print(f"Expected columns: {expected_columns}")
        return data

    df = data.copy()
    df['Kalman'] = kalman_fair(df, q_price=1e-6, q_drift=5e-6, r_base=5e-1)['kalman_price']
    df["SMA_20"] = df['YMZ_Close'].rolling("5D").mean()

    return df

df = append_features(data).ffill()
import plotly.graph_objects as go
fig = go.Figure()
for i, (name, yvals) in enumerate({"Close": df.YMZ_Close,"Kalman": df.Kalman,"SMA": df.SMA_20}.items()): fig.add_trace(go.Scatter(x=df.index, y=yvals, name=name))
fig.show()

NameError: name 'data' is not defined

In [50]:
!pip install filterpy

Collecting filterpy
  Downloading filterpy-1.4.5.zip (177 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/178.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.0/178.0 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: filterpy
  Building wheel for filterpy (setup.py) ... [?25l[?25hdone
  Created wheel for filterpy: filename=filterpy-1.4.5-py3-none-any.whl size=110460 sha256=b8ae3f93af767f74366155f572f02ddedc535fe1dbfe30e2e6963aa7945b5dcb
  Stored in directory: /root/.cache/pip/wheels/77/bf/4c/b0c3f4798a0166668752312a67118b27a3cd341e13ac0ae6ee
Successfully built filterpy
Installing collected packages: filterpy
Successfully installed filterpy-1.4.5
