# Barra Factor Snapshot
Use this notebook to explore factor exposures and returns for a given month-end.

## Instructions
1. Update `AS_OF` below to the target rebalance date (month-end).
2. Run all cells after refreshing exposures via `python -m src.cli run-pipeline --date YYYY-MM-DD`.
3. Use the summary frames and charts to highlight exposure drift, imputation share, and factor performance.

In [None]:
import pandas as pd
import duckdb
import matplotlib.pyplot as plt

from src.config import ANALYTICS_DB

plt.style.use("seaborn-v0_8")
AS_OF = pd.Timestamp("2025-09-30").date()
conn = duckdb.connect(ANALYTICS_DB.as_posix(), read_only=True)

In [None]:
style_summary = conn.execute(
    """
    SELECT factor,
           COUNT(*) AS observations,
           AVG(exposure) AS avg_exposure,
           STDDEV_POP(exposure) AS std_exposure,
           SUM(CASE WHEN flags LIKE '%imputed%' THEN 1 ELSE 0 END) AS imputed_rows
    FROM analytics.style_factor_exposures
    WHERE month_end_date = ?
    GROUP BY factor
    ORDER BY factor
    """,
    [AS_OF],
).fetchdf()
style_summary["imputed_pct"] = style_summary["imputed_rows"] / style_summary["observations"]
style_summary

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))
style_summary.plot.bar(x="factor", y="imputed_pct", ax=ax, legend=False, color="#ff7f0e")
ax.set_ylabel("Imputed %")
ax.set_title(f"Imputation Share by Factor ({AS_OF})")
ax.axhline(0.3, color="red", linestyle="--", linewidth=1)
ax.set_ylim(0, max(0.35, style_summary['imputed_pct'].max() * 1.1))
plt.show()

In [None]:
factor_returns = conn.execute(
    "SELECT factor, factor_return FROM analytics.factor_returns WHERE month_end_date = ? ORDER BY factor",
    [AS_OF],
).fetchdf()
factor_returns

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))
factor_returns.plot.bar(x="factor", y="factor_return", color="#1f77b4", ax=ax, legend=False)
ax.set_ylabel("Return")
ax.set_title(f"Factor Returns ({AS_OF})")
ax.axhline(0, color="black", linewidth=0.8)
plt.show()

In [None]:
exposures = conn.execute(
    "SELECT factor, exposure FROM analytics.style_factor_exposures WHERE month_end_date = ?",
    [AS_OF],
).fetchdf()
fig, ax = plt.subplots(figsize=(12, 5))
exposures.boxplot(by="factor", column="exposure", ax=ax, rot=45)
ax.set_title(f"Exposure Distribution by Factor ({AS_OF})")
ax.set_ylabel("Exposure")
plt.suptitle("")
plt.show()

In [None]:
top_abs = conn.execute(
    """
    SELECT factor, gvkey, exposure, flags
    FROM analytics.style_factor_exposures
    WHERE month_end_date = ?
    ORDER BY ABS(exposure) DESC
    LIMIT 25
    """,
    [AS_OF],
).fetchdf()
top_abs