In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from typing import Dict, Tuple, Optional

def best_level_ofi(df: pd.DataFrame) -> pd.Series:
    """ Best-level OFI with NaN first row. """
    prev_bid_px = df['bid_px_00'].shift(1)
    prev_bid_sz = df['bid_sz_00'].shift(1)
    prev_ask_px = df['ask_px_00'].shift(1)
    prev_ask_sz = df['ask_sz_00'].shift(1)

    ofi_bid = np.where(
        df['bid_px_00'] > prev_bid_px,
        df['bid_sz_00'],
        np.where(
            df['bid_px_00'] == prev_bid_px,
            df['bid_sz_00'] - prev_bid_sz,
            -df['bid_sz_00']
        )
    )
    ofi_ask = np.where(
        df['ask_px_00'] > prev_ask_px,
        -df['ask_sz_00'],
        np.where(
            df['ask_px_00'] == prev_ask_px,
            df['ask_sz_00'] - prev_ask_sz,
            df['ask_sz_00']
        )
    )

    ofi = pd.Series(ofi_bid - ofi_ask, index=df.index, name='ofi_best')
    ofi.iloc[0] = np.nan
    return ofi

def multi_level_ofi(df: pd.DataFrame, levels: int = 10) -> pd.DataFrame:
    """ Multi-level OFI normalized by Q, with NaN first row. """
    depth = [(df[f'bid_sz_{d:02d}'] + df[f'ask_sz_{d:02d}']) / 2 for d in range(levels)]
    Q = pd.concat(depth, axis=1).mean(axis=1)

    ofi_levels = {}
    for d in range(levels):
        lvl = f"{d:02d}"
        prev_bid_px = df[f'bid_px_{lvl}'].shift(1)
        prev_bid_sz = df[f'bid_sz_{lvl}'].shift(1)
        prev_ask_px = df[f'ask_px_{lvl}'].shift(1)
        prev_ask_sz = df[f'ask_sz_{lvl}'].shift(1)

        ofi_bid = np.where(
            df[f'bid_px_{lvl}'] > prev_bid_px,
            df[f'bid_sz_{lvl}'],
            np.where(
                df[f'bid_px_{lvl}'] == prev_bid_px,
                df[f'bid_sz_{lvl}'] - prev_bid_sz,
                -df[f'bid_sz_{lvl}']
            )
        )
        ofi_ask = np.where(
            df[f'ask_px_{lvl}'] > prev_ask_px,
            -df[f'ask_sz_{lvl}'],
            np.where(
                df[f'ask_px_{lvl}'] == prev_ask_px,
                df[f'ask_sz_{lvl}'] - prev_ask_sz,
                df[f'ask_sz_{lvl}']
            )
        )
        ofi_lvl = (ofi_bid - ofi_ask) / (Q + 1e-10)
        ofi_lvl = pd.Series(ofi_lvl, index=df.index, name=f'ofi_level_{d}')
        ofi_lvl.iloc[0] = np.nan
        ofi_levels[f'ofi_level_{d}'] = ofi_lvl

    return pd.DataFrame(ofi_levels)

def integrated_ofi(ofi_df: pd.DataFrame) -> pd.Series:
    """ Integrated OFI via PCA, filling NaN with zero for PCA fit. """
    ofi_filled = ofi_df.fillna(0)
    ofi_std = (ofi_filled - ofi_filled.mean()) / (ofi_filled.std() + 1e-10)
    pca = PCA(n_components=1)
    pca.fit(ofi_std)
    w1 = pca.components_[0]
    proj = ofi_std.values.dot(w1)
    integrated = proj / np.sum(np.abs(w1))
    return pd.Series(integrated, index=ofi_df.index, name='ofi_integrated')

def cross_asset_ofi(ofi_dict: Dict[str, pd.Series],
                    weights: Optional[Dict[str, float]] = None) -> pd.Series:
    """
    Cross‐Asset OFI: aggregate multiple single‐asset OFI series into one.

    ofi_dict: mapping from name to OFI Series (aligned on timestamps).
    weights: optional mapping of same keys to floats; normalized to sum to 1.
             Defaults to equal weights.

    Returns Series with weighted sum of all series.
    """
    # Need at least two assets
    if len(ofi_dict) < 2:
        raise ValueError(
            "Cross‐Asset OFI requires at least two instruments; "
            f"received {len(ofi_dict)}."
        )

    ofi_df = pd.concat(ofi_dict, axis=1)

    if weights is None:
        weights = {k: 1.0 / len(ofi_dict) for k in ofi_dict}
    missing = set(ofi_dict) - set(weights)
    if missing:
        raise KeyError(f"Weights missing for instruments: {missing}")

    total_w = sum(weights.values())
    if total_w == 0:
        raise ValueError("Sum of weights must be non-zero.")
    norm_weights = {k: v / total_w for k, v in weights.items()}

    weight_series = pd.Series(norm_weights)
    weighted_ofi = ofi_df.mul(weight_series, axis=1)

    cross_ofi = weighted_ofi.sum(axis=1)
    cross_ofi.name = 'ofi_cross_asset'
    return cross_ofi


df = pd.read_csv('first_25000_rows.csv')

df['ofi_best'] = best_level_ofi(df)
ml_ofi = multi_level_ofi(df, levels=10)
df = df.join(ml_ofi)
df['ofi_integrated'] = integrated_ofi(ml_ofi)

# Sample usage of cross asset
# ofi_dict = {
#     'AAPL_stock': ofi_stock,   # from compute_integrated_ofi, say
#     'AAPL_fut':   ofi_fut,     # same shape & index
#     'SPY_etf':    ofi_spy
# }
#
# ofi_x2 = cross_asset_ofi(ofi_dict, weights={'AAPL_stock':0.5,'AAPL_fut':0.3,'SPY_etf':0.2})

In [None]:
df[['ofi_best', 'ofi_level_0', 'ofi_level_1', 'ofi_level_2', 'ofi_level_3', 'ofi_level_4', 'ofi_level_5', 'ofi_level_6', 'ofi_level_7', 'ofi_level_8', 'ofi_level_9', 'ofi_integrated', 'ofi_cross_lag1']].to_csv('ofi_calculations.csv', index=False)