# Analysis
Derive multi-level OFI metrics (up to 5 levels) for each stock in the dataset. Integrate these multi-level OFIs into a single metric using Principal Component Analysis (PCA) or another dimensionality reduction method. Examine the contemporaneous cross-impact of OFI on short-term price changes across stocks. Evaluate the predictive power of lagged cross-asset OFI on future price changes (e.g., 1-minute and 5-minute horizons). Use regression models to assess the explanatory power of contemporaneous OFI and predictive power of lagged OFI. Compare self-impact (within the same stock) vs. cross-impact (between stocks) in the models.

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

def compute_ofi(df, levels=5):
    ofi = {}
    for level in range(1, levels + 1):
        bid_col, ask_col = f"bid_vol_{level}", f"ask_vol_{level}"
        ofi[f"OFI_{level}"] = df[bid_col].diff() - df[ask_col].diff()
    return pd.DataFrame(ofi)

def cross_impact(ofi_data, price_changes):
    results = {}
    for target in price_changes.columns:
        X = pd.concat(ofi_data.values(), axis=1).dropna().values
        y = price_changes[target].dropna().values
        model = LinearRegression().fit(X, y)
        results[target] = {
            "coefficients": model.coef_,
            "r_squared": model.score(X, y),
        }
    return results

In [None]:
stocks = ["AAPL", "AMGN", "TSLA", "JPM", "XOM"]
data = {stock: pd.read_csv(f"data/{stock}.csv") for stock in stocks}

## Explore Data

In [None]:
# exploratory data analysis

## Compute OFI Metrics

In [None]:
ofi_data = {stock: compute_ofi(data[stock]) for stock in stocks}

pca = PCA(n_components=1)
pca_ofi_data = {
    stock: pd.Series(pca.fit_transform(ofi.dropna()).flatten(), name="PCA_OFI")
    for stock, ofi in ofi_data.items()
}

## Analyze cross-impact

In [None]:
price_changes = pd.DataFrame({stock: data[stock]["price"].pct_change() for stock in stocks})
cross_impact_results = cross_impact(pca_ofi_data, price_changes)

In [None]:
pd.DataFrame(cross_impact_results).to_csv("results/cross_impact_results.csv")