# P&L Attribution

Attribution analysis for the WolfpackTrend strategy using position weights and daily returns.

**Data Sources:**
- `wolfpack/positions.csv` - Daily positions with weights
- `wolfpack/daily_snapshots.csv` - Daily NAV and P&L
- `wolfpack/slippage.csv` - Per-order slippage

**Analysis:**
- Per-symbol P&L contribution based on weights
- Daily slippage cost overlay
- Sector/symbol contribution breakdown

**Prerequisites:** Run the WolfpackTrend backtest first to generate ObjectStore data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display
from pathlib import Path

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 50)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook

qb = QuantBook()
print("QuantBook initialized")


def read_csv_from_store(key):
    """Read CSV from ObjectStore with error handling."""
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f"ObjectStore key not found: {key}")
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f"Empty ObjectStore key: {key}")
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f"Error reading {key}: {e}")
        return None


def normalize_trades(df):
    if df is None:
        return None
    df = df.copy()

    # Normalize symbol column
    if 'symbol' not in df.columns:
        if 'Symbols' in df.columns:
            df['symbol'] = df['Symbols']
        elif 'ticker' in df.columns:
            df['symbol'] = df['ticker']

    # Normalize date
    if 'date' not in df.columns:
        if 'Exit Time' in df.columns:
            df['date'] = pd.to_datetime(df['Exit Time']).dt.date
        elif 'exit_time' in df.columns:
            df['date'] = pd.to_datetime(df['exit_time']).dt.date
        elif 'Entry Time' in df.columns:
            df['date'] = pd.to_datetime(df['Entry Time']).dt.date
    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'])

    # Normalize realized P&L
    if 'realized_pnl' not in df.columns:
        if 'P&L' in df.columns:
            df['realized_pnl'] = df['P&L']
        elif 'pnl' in df.columns:
            df['realized_pnl'] = df['pnl']

    # Normalize fees
    if 'fees' not in df.columns:
        df['fees'] = df['Fees'] if 'Fees' in df.columns else 0.0

    return df


## Load Data

In [None]:
df_positions = read_csv_from_store("wolfpack/positions.csv")
df_snapshots = read_csv_from_store("wolfpack/daily_snapshots.csv")
df_slippage = read_csv_from_store("wolfpack/slippage.csv")
df_signals = read_csv_from_store("wolfpack/signals.csv")
df_trades = read_csv_from_store("wolfpack/trades.csv")

# Parse dates
if df_positions is not None:
    df_positions['date'] = pd.to_datetime(df_positions['date'])
    print(f"Positions: {len(df_positions)} records")
    print(f"  Columns: {list(df_positions.columns)}")
    if 'daily_dividends' in df_positions.columns:
        df_positions['daily_dividends'] = pd.to_numeric(df_positions['daily_dividends'], errors='coerce')
else:
    print("WARNING: positions.csv not found - P&L attribution will be limited")

if df_snapshots is not None:
    df_snapshots['date'] = pd.to_datetime(df_snapshots['date'])
    print(f"Daily snapshots: {len(df_snapshots)} records")

if df_slippage is not None:
    df_slippage['date'] = pd.to_datetime(df_slippage['date'])
    print(f"Slippage: {len(df_slippage)} records")
else:
    print("Note: slippage.csv not found - slippage overlay will be skipped")

if df_signals is not None:
    df_signals['date'] = pd.to_datetime(df_signals['date'])
    print(f"Signals: {len(df_signals)} records")
    print(f"  Columns: {list(df_signals.columns)}")
else:
    print("Note: signals.csv not found - horizon attribution will be skipped")

# Fallback to local trades export if ObjectStore is missing
if df_trades is None:
    local_trades_path = Path("../Ugly Apricot Buffalo_trades.csv")
    if local_trades_path.exists():
        df_trades = pd.read_csv(local_trades_path)
        print(f"Loaded local trades: {local_trades_path}")
    else:
        print("Note: trades.csv not found - realized P&L will not be included")

df_trades = normalize_trades(df_trades)
if df_trades is not None:
    print(f"Trades: {len(df_trades)} records (realized P&L from closed positions)")

# Initialize shared vars to avoid NameError in later cells
symbol_col = None
weight_col = None
pnl_col = None
daily_position_pnl = None
attribution_label = None
merged = None


## Portfolio P&L Overview

In [None]:
if df_snapshots is not None:
    # Calculate daily P&L if not present
    if 'daily_pnl' not in df_snapshots.columns:
        df_snapshots['daily_pnl'] = df_snapshots['nav'].diff()
    
    if 'cumulative_pnl' not in df_snapshots.columns:
        df_snapshots['cumulative_pnl'] = df_snapshots['daily_pnl'].cumsum()
    
    print("\nPortfolio P&L Summary:")
    print("=" * 60)
    print(f"Total P&L: ${df_snapshots['daily_pnl'].sum():,.2f}")
    print(f"Best day: ${df_snapshots['daily_pnl'].max():,.2f}")
    print(f"Worst day: ${df_snapshots['daily_pnl'].min():,.2f}")
    print(f"Average daily P&L: ${df_snapshots['daily_pnl'].mean():,.2f}")
    
    # Plot daily P&L
    fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)
    
    axes[0].bar(df_snapshots['date'], df_snapshots['daily_pnl'], 
                color=np.where(df_snapshots['daily_pnl'] >= 0, 'green', 'red'), alpha=0.7)
    axes[0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[0].set_title('Daily P&L', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('P&L ($)')
    axes[0].grid(True, alpha=0.3)
    
    axes[1].plot(df_snapshots['date'], df_snapshots['cumulative_pnl'], linewidth=2, color='steelblue')
    axes[1].fill_between(df_snapshots['date'], 0, df_snapshots['cumulative_pnl'], 
                         where=df_snapshots['cumulative_pnl'] >= 0, alpha=0.3, color='green')
    axes[1].fill_between(df_snapshots['date'], 0, df_snapshots['cumulative_pnl'], 
                         where=df_snapshots['cumulative_pnl'] < 0, alpha=0.3, color='red')
    axes[1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[1].set_title('Cumulative P&L', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Cumulative P&L ($)')
    axes[1].grid(True, alpha=0.3)
    
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

## Position-Based P&L Attribution

Compute per-symbol P&L contribution using position weights and price returns.

In [None]:
if df_positions is not None and df_snapshots is not None:
    # Identify columns
    symbol_col = None
    for col in ['symbol', 'ticker', 'underlying']:
        if col in df_positions.columns:
            symbol_col = col
            break

    weight_col = None
    for col in ['weight', 'portfolio_weight']:
        if col in df_positions.columns:
            weight_col = col
            break

    print(f"Symbol column: {symbol_col}")
    print(f"Weight column: {weight_col}")

    symbol_pnl = pd.Series(dtype=float)
    pnl_col = None
    daily_position_pnl = None
    attribution_label = None

    USE_RETURNS_ATTRIBUTION = False
    fees_already_included = False

    # Prefer daily total net P&L if available (realized + unrealized - fees)
    if symbol_col and (not USE_RETURNS_ATTRIBUTION) and ('daily_total_net_pnl' in df_positions.columns):
        pnl_col = 'daily_total_net_pnl'
        attribution_label = 'positions daily_total_net_pnl (realized + unrealized - fees)'
        fees_already_included = True
        print(f"P&L attribution method: {attribution_label}")

        symbol_pnl = df_positions.groupby(symbol_col)[pnl_col].sum().sort_values(ascending=False)
        print()
        print("Total P&L by Symbol (attributed):")
        print("=" * 60)
        display(symbol_pnl.head(20).to_frame('Total P&L'))

        daily_position_pnl = df_positions.groupby('date')[pnl_col].sum().reset_index()
        daily_position_pnl.columns = ['date', 'position_pnl']
        print()
        print(f"Total attributed from held positions: ${daily_position_pnl['position_pnl'].sum():,.2f}")

        if df_trades is not None and 'realized_pnl' in df_trades.columns and 'symbol' in df_trades.columns:
            realized_by_symbol = df_trades.groupby('symbol')['realized_pnl'].sum()
            print()
            print(f"Total realized P&L from trades file: ${realized_by_symbol.sum():,.2f}")
    # Prefer daily MTM P&L if available (aligns with EOD holdings)
    elif symbol_col and (not USE_RETURNS_ATTRIBUTION) and ('daily_pnl' in df_positions.columns):
        pnl_col = 'daily_pnl'
        attribution_label = 'positions daily_pnl (unrealized delta)'
        print(f"P&L attribution method: {attribution_label}")

        symbol_pnl = df_positions.groupby(symbol_col)[pnl_col].sum().sort_values(ascending=False)
        print()
        print("Total P&L by Symbol (attributed):")
        print("=" * 60)
        display(symbol_pnl.head(20).to_frame('Total P&L'))

        daily_position_pnl = df_positions.groupby('date')[pnl_col].sum().reset_index()
        daily_position_pnl.columns = ['date', 'position_pnl']
        print()
        print(f"Total attributed from held positions: ${daily_position_pnl['position_pnl'].sum():,.2f}")

        if df_trades is not None and 'realized_pnl' in df_trades.columns and 'symbol' in df_trades.columns:
            realized_by_symbol = df_trades.groupby('symbol')['realized_pnl'].sum()
            print()
            print(f"Total realized P&L from trades file: ${realized_by_symbol.sum():,.2f}")
    else:
        # Ensure price column for returns
        if 'price' not in df_positions.columns:
            if 'market_value' in df_positions.columns and 'quantity' in df_positions.columns:
                df_positions['price'] = df_positions['market_value'] / df_positions['quantity']
                print("Derived price from market_value / quantity")
            else:
                print("WARNING: No price or market_value/quantity columns; cannot compute returns")

        # Ensure market_value column
        if 'market_value' not in df_positions.columns:
            if weight_col and 'nav' in df_snapshots.columns:
                df_positions = df_positions.merge(df_snapshots[['date', 'nav']], on='date', how='left')
                df_positions['market_value'] = df_positions[weight_col] * df_positions['nav']
                print("Derived market_value from weight * nav")
            elif 'price' in df_positions.columns and 'quantity' in df_positions.columns:
                df_positions['market_value'] = df_positions['price'] * df_positions['quantity']
                print("Derived market_value from price * quantity")

        # Primary attribution: weights + daily returns
        if symbol_col and 'price' in df_positions.columns:
            df_positions = df_positions.sort_values(['date', symbol_col])
            df_positions['prev_price'] = df_positions.groupby(symbol_col)['price'].shift(1)
            df_positions['return'] = df_positions['price'] / df_positions['prev_price'] - 1

            pnl_method = None
            if weight_col and 'nav' in df_snapshots.columns:
                nav_by_date = df_snapshots[['date', 'nav']].sort_values('date').copy()
                nav_by_date['prev_nav'] = nav_by_date['nav'].shift(1)
                df_positions = df_positions.merge(nav_by_date[['date', 'prev_nav']], on='date', how='left')
                df_positions['prev_weight'] = df_positions.groupby(symbol_col)[weight_col].shift(1)
                df_positions['position_pnl'] = df_positions['prev_weight'] * df_positions['prev_nav'] * df_positions['return']
                pnl_method = "weights * returns (prev weight * prev NAV)"
            elif 'market_value' in df_positions.columns:
                df_positions['prev_market_value'] = df_positions.groupby(symbol_col)['market_value'].shift(1)
                df_positions['position_pnl'] = df_positions['prev_market_value'] * df_positions['return']
                pnl_method = "returns * prior market value"

            if pnl_method:
                df_positions['position_pnl'] = df_positions['position_pnl'].fillna(0)
                pnl_col = 'position_pnl'
                attribution_label = pnl_method
                print(f"P&L attribution method: {pnl_method}")

                symbol_pnl = df_positions.groupby(symbol_col)[pnl_col].sum().sort_values(ascending=False)

                print()
                print("Total P&L by Symbol (attributed):")
                print("=" * 60)
                display(symbol_pnl.head(20).to_frame('Total P&L'))

                daily_position_pnl = df_positions.groupby('date')[pnl_col].sum().reset_index()
                daily_position_pnl.columns = ['date', 'position_pnl']
                print()
                print(f"Total attributed from held positions: ${daily_position_pnl['position_pnl'].sum():,.2f}")

                if df_trades is not None and 'realized_pnl' in df_trades.columns and 'symbol' in df_trades.columns:
                    realized_by_symbol = df_trades.groupby('symbol')['realized_pnl'].sum()
                    print()
                    print(f"Total realized P&L from trades file: ${realized_by_symbol.sum():,.2f}")
            else:
                print()
                print("WARNING: Unable to compute returns-based attribution")
        else:
            print()
            print("WARNING: Missing symbol or price data for attribution")

    # Fallbacks if returns-based attribution isn't possible
    if pnl_col is None:
        if 'daily_total_net_pnl' in df_positions.columns:
            pnl_col = 'daily_total_net_pnl'
            attribution_label = 'daily_total_net_pnl column (realized + unrealized - fees)'
            fees_already_included = True
            print("Using daily_total_net_pnl column from positions.csv")
        elif 'daily_pnl' in df_positions.columns:
            pnl_col = 'daily_pnl'
            attribution_label = 'daily_pnl column'
            print("Using daily_pnl column from positions.csv")
        elif 'pnl' in df_positions.columns:
            pnl_col = 'pnl'
            attribution_label = 'pnl column'
        elif 'mtm_pnl' in df_positions.columns:
            pnl_col = 'mtm_pnl'
            attribution_label = 'mtm_pnl column'

        if pnl_col:
            symbol_pnl = df_positions.groupby(symbol_col)[pnl_col].sum().sort_values(ascending=False)
            print()
            print("Total P&L by Symbol (fallback):")
            print("=" * 60)
            display(symbol_pnl.head(20).to_frame('Total P&L'))

            daily_position_pnl = df_positions.groupby('date')[pnl_col].sum().reset_index()
            daily_position_pnl.columns = ['date', 'position_pnl']
            print()
            print(f"Total attributed from held positions: ${daily_position_pnl['position_pnl'].sum():,.2f}")

        elif 'unrealized_pnl' in df_positions.columns and symbol_col:
            print()
            print("No daily_pnl column - computing from unrealized_pnl changes...")
            print("NOTE: Re-run backtest with updated logger for accurate attribution")

            positions = df_positions[['date', symbol_col, 'unrealized_pnl']].copy()
            positions = positions.sort_values(['date', symbol_col])
            positions['prev_unrealized'] = positions.groupby(symbol_col)['unrealized_pnl'].shift(1)
            positions['daily_pnl'] = positions['unrealized_pnl'] - positions['prev_unrealized']
            positions['daily_pnl'] = positions['daily_pnl'].fillna(positions['unrealized_pnl'])

            pnl_col = 'daily_pnl'
            attribution_label = 'unrealized_pnl delta'
            df_positions = df_positions.merge(
                positions[['date', symbol_col, 'daily_pnl']],
                on=['date', symbol_col],
                how='left'
            )

            symbol_pnl = df_positions.groupby(symbol_col)['daily_pnl'].sum().sort_values(ascending=False)
            print()
            print("Estimated P&L by Symbol:")
            print("=" * 60)
            display(symbol_pnl.head(20).to_frame('Total P&L'))

            daily_position_pnl = df_positions.groupby('date')[pnl_col].sum().reset_index()
            daily_position_pnl.columns = ['date', 'position_pnl']
        else:
            print()
            print("WARNING: No P&L data available for attribution")
else:
    print("Positions data not available for attribution analysis")
    pnl_col = None


## P&L Reconciliation: Attributed vs NAV Change

Compare the P&L we're accounting for (from positions) against the actual daily NAV change to identify any gaps.

In [None]:
if df_snapshots is not None and (
    ('daily_position_pnl' in dir() and daily_position_pnl is not None)
    or (df_positions is not None and pnl_col and pnl_col in df_positions.columns)
):
    # Calculate NAV change from snapshots
    df_snapshots_sorted = df_snapshots.sort_values('date').copy()
    if 'daily_pnl' in df_snapshots_sorted.columns:
        df_snapshots_sorted['nav_change'] = df_snapshots_sorted['daily_pnl']
    else:
        df_snapshots_sorted['nav_change'] = df_snapshots_sorted['nav'].diff()

    # Daily P&L from attribution
    if 'daily_position_pnl' in dir() and daily_position_pnl is not None:
        daily_attributed = daily_position_pnl.copy()
    else:
        daily_position_pnl = df_positions.groupby('date')[pnl_col].sum().reset_index()
        daily_position_pnl.columns = ['date', 'position_pnl']
        daily_attributed = daily_position_pnl.copy()

    fees_already_included = fees_already_included if 'fees_already_included' in dir() else False

    # Optional realized P&L from trades file (not included in MTM attribution)
    if df_trades is not None and 'realized_pnl' in df_trades.columns:
        daily_realized = df_trades.groupby('date')['realized_pnl'].sum().reset_index()
        daily_realized.columns = ['date', 'realized_pnl']
        daily_attributed = daily_attributed.merge(daily_realized, on='date', how='left')

    if 'realized_pnl' not in daily_attributed.columns:
        daily_attributed['realized_pnl'] = 0.0
    else:
        daily_attributed['realized_pnl'] = daily_attributed['realized_pnl'].fillna(0.0)

    # Fees from positions (preferred) or trades (subtract from NAV change)
    if not fees_already_included:
        if df_positions is not None and 'daily_fees' in df_positions.columns:
            daily_fees = df_positions.groupby('date')['daily_fees'].sum().reset_index()
            daily_fees.columns = ['date', 'fees']
            daily_attributed = daily_attributed.merge(daily_fees, on='date', how='left')
        elif df_trades is not None and 'fees' in df_trades.columns:
            daily_fees = df_trades.groupby('date')['fees'].sum().reset_index()
            daily_fees.columns = ['date', 'fees']
            daily_attributed = daily_attributed.merge(daily_fees, on='date', how='left')

    if 'fees' not in daily_attributed.columns:
        daily_attributed['fees'] = 0.0
    else:
        daily_attributed['fees'] = daily_attributed['fees'].fillna(0.0)

    # Dividends from positions (if available)
    if df_positions is not None and 'daily_dividends' in df_positions.columns:
        daily_dividends = df_positions.groupby('date')['daily_dividends'].sum().reset_index()
        daily_dividends.columns = ['date', 'dividends']
        daily_attributed = daily_attributed.merge(daily_dividends, on='date', how='left')

    if 'dividends' not in daily_attributed.columns:
        daily_attributed['dividends'] = 0.0
    else:
        daily_attributed['dividends'] = daily_attributed['dividends'].fillna(0.0)

    daily_attributed['attributed_pnl'] = daily_attributed['position_pnl']
    if fees_already_included:
        daily_attributed['fees'] = 0.0
        daily_attributed['attributed_net'] = daily_attributed['attributed_pnl']
    else:
        daily_attributed['attributed_net'] = daily_attributed['attributed_pnl'] - daily_attributed['fees']

    # Merge with NAV changes
    reconcile = df_snapshots_sorted[['date', 'nav', 'nav_change']].merge(
        daily_attributed, on='date', how='left'
    )
    reconcile['position_pnl'] = reconcile['position_pnl'].fillna(0)
    reconcile['realized_pnl'] = reconcile['realized_pnl'].fillna(0)
    reconcile['attributed_pnl'] = reconcile['attributed_pnl'].fillna(0)
    reconcile['fees'] = reconcile['fees'].fillna(0)
    reconcile['attributed_net'] = reconcile['attributed_net'].fillna(0)
    reconcile['dividends'] = reconcile['dividends'].fillna(0)

    # Calculate unexplained (dividends informational under Adjusted)
    reconcile['unexplained'] = reconcile['nav_change'] - reconcile['attributed_net']

    # Summary statistics
    label = attribution_label if 'attribution_label' in dir() and attribution_label else "held positions"
    print("=" * 80)
    print("P&L RECONCILIATION: Attributed vs NAV Change")
    print("=" * 80)
    print()
    print(f"Total NAV Change: ${reconcile['nav_change'].sum():,.2f}")
    print(f"  - From held positions ({label}): ${reconcile['position_pnl'].sum():,.2f}")
    print(f"  - Dividends (informational; adjusted pricing): ${reconcile['dividends'].sum():,.2f}")
    if df_trades is not None and 'realized_pnl' in df_trades.columns:
        print(f"  - Realized P&L in trades file (not included): ${reconcile['realized_pnl'].sum():,.2f}")
    if fees_already_included:
        print("  - Fees: included in position P&L")
    elif df_positions is not None and 'daily_fees' in df_positions.columns:
        print(f"  - Fees (from positions file): ${reconcile['fees'].sum():,.2f}")
    elif df_trades is not None and 'fees' in df_trades.columns:
        print(f"  - Fees (from trades file): ${reconcile['fees'].sum():,.2f}")
    if fees_already_included:
        print(f"Total Attributed Net ({label}): ${reconcile['attributed_net'].sum():,.2f}")
    else:
        print(f"Total Attributed Net ({label} - fees): ${reconcile['attributed_net'].sum():,.2f}")
    print(f"Total Unexplained: ${reconcile['unexplained'].sum():,.2f}")

    nav_change_total = reconcile['nav_change'].sum()
    if nav_change_total != 0:
        pct_unexplained = 100 * abs(reconcile['unexplained'].sum()) / abs(nav_change_total)
        print()
        print(f"Unexplained as % of NAV Change: {pct_unexplained:.2f}%")
        if pct_unexplained < 1:
            print("GOOD: Reconciliation is within 1%")
        elif pct_unexplained < 5:
            print("WARNING: Reconciliation gap is 1-5%")
        else:
            print("ALERT: Significant reconciliation gap (>5%)")
            print()
            print("Possible causes of gap:")
            print("  - Rebalances or intraday trades (weights assumed from prior close)")
            print("  - Slippage, fees, interest, dividends")
            print("  - Dividends on adjusted pricing (NAV may not move on ex-date)")
            print("  - Data logged at different times than NAV snapshot")

    # Plot reconciliation
    fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)

    # Plot 1: NAV Change vs Attributed Net P&L
    axes[0].plot(reconcile['date'], reconcile['nav_change'], 'b-', linewidth=2, label='NAV Change', alpha=0.8)
    axes[0].plot(reconcile['date'], reconcile['attributed_net'], 'g--', linewidth=2, label='Attributed Net P&L', alpha=0.8)
    axes[0].set_title('Daily P&L Reconciliation: NAV Change vs Attributed Net', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('P&L ($)')
    axes[0].legend(loc='upper left')
    axes[0].axhline(y=0, color='k', linestyle='-', alpha=0.3)
    axes[0].grid(True, alpha=0.3)

    # Plot 2: Unexplained P&L
    axes[1].bar(reconcile['date'], reconcile['unexplained'],
                color=np.where(reconcile['unexplained'] >= 0, 'orange', 'purple'), alpha=0.7)
    axes[1].set_title('Daily Unexplained P&L (NAV Change - Attributed Net)', fontsize=14, fontweight='bold')
    axes[1].set_ylabel('Unexplained P&L ($)')
    axes[1].axhline(y=0, color='k', linestyle='-', alpha=0.3)
    axes[1].grid(True, alpha=0.3)

    # Plot 3: Cumulative comparison
    reconcile['cumulative_nav_change'] = reconcile['nav_change'].cumsum()
    reconcile['cumulative_attributed_net'] = reconcile['attributed_net'].cumsum()
    reconcile['cumulative_unexplained'] = reconcile['unexplained'].cumsum()

    axes[2].plot(reconcile['date'], reconcile['cumulative_nav_change'], 'b-', linewidth=2, label='Cumulative NAV Change')
    axes[2].plot(reconcile['date'], reconcile['cumulative_attributed_net'], 'g--', linewidth=2, label='Cumulative Attributed Net')
    axes[2].fill_between(reconcile['date'], reconcile['cumulative_attributed_net'], reconcile['cumulative_nav_change'],
                         alpha=0.3, color='red', label='Cumulative Gap')
    axes[2].set_title('Cumulative P&L: NAV Change vs Attributed Net', fontsize=14, fontweight='bold')
    axes[2].set_xlabel('Date')
    axes[2].set_ylabel('Cumulative P&L ($)')
    axes[2].legend(loc='upper left')
    axes[2].axhline(y=0, color='k', linestyle='-', alpha=0.3)
    axes[2].grid(True, alpha=0.3)

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

    # Show days with largest unexplained P&L
    if reconcile['unexplained'].abs().max() > 100:
        print()
        print("Days with largest unexplained P&L (top 10):")
        print("=" * 80)
        top_unexplained = reconcile.copy()
        top_unexplained['unexplained_abs'] = top_unexplained['unexplained'].abs()
        top_unexplained = top_unexplained.nlargest(10, 'unexplained_abs')[
            ['date', 'nav_change', 'position_pnl', 'fees', 'attributed_net', 'unexplained']
        ]
        display(top_unexplained)

else:
    print("Insufficient data for P&L reconciliation")
    print(f"  pnl_col: {pnl_col if 'pnl_col' in dir() else 'not set'}")
    if df_positions is not None:
        print(f"  positions columns: {list(df_positions.columns)}")


## Top/Bottom Contributors

In [None]:
if 'symbol_pnl' in dir() and len(symbol_pnl) > 0:
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # Top contributors
    top_10 = symbol_pnl.head(10)
    axes[0].barh(top_10.index, top_10.values, color='green', alpha=0.7)
    axes[0].set_title('Top 10 Contributors (P&L)', fontsize=12, fontweight='bold')
    axes[0].set_xlabel('P&L ($)')
    axes[0].invert_yaxis()
    axes[0].grid(True, alpha=0.3, axis='x')
    
    # Bottom contributors
    bottom_10 = symbol_pnl.tail(10)
    axes[1].barh(bottom_10.index, bottom_10.values, color='red', alpha=0.7)
    axes[1].set_title('Bottom 10 Contributors (P&L)', fontsize=12, fontweight='bold')
    axes[1].set_xlabel('P&L ($)')
    axes[1].invert_yaxis()
    axes[1].grid(True, alpha=0.3, axis='x')
    
    plt.tight_layout()
    plt.show()
    
    # Summary
    print("\nP&L Attribution Summary:")
    print("=" * 60)
    total_pnl = symbol_pnl.sum()
    positive_pnl = symbol_pnl[symbol_pnl > 0].sum()
    negative_pnl = symbol_pnl[symbol_pnl < 0].sum()
    
    print(f"Total P&L: ${total_pnl:,.2f}")
    print(f"Winning positions P&L: ${positive_pnl:,.2f}")
    print(f"Losing positions P&L: ${negative_pnl:,.2f}")
    print(f"Win/Loss ratio: {abs(positive_pnl/negative_pnl):.2f}" if negative_pnl != 0 else "N/A")
    print(f"Symbols with profit: {(symbol_pnl > 0).sum()} / {len(symbol_pnl)}")

## P&L Contribution Over Time by Symbol

In [None]:
if df_positions is not None and symbol_col and pnl_col:
    # Get top symbols by total P&L contribution (by absolute value for better visibility)
    top_symbols = symbol_pnl.abs().nlargest(10).index.tolist()
    
    # Aggregate daily P&L by symbol
    daily_symbol_pnl = df_positions.groupby(['date', symbol_col])[pnl_col].sum().reset_index()
    
    # Pivot for top symbols
    pivot = daily_symbol_pnl[daily_symbol_pnl[symbol_col].isin(top_symbols)].pivot_table(
        index='date', columns=symbol_col, values=pnl_col, fill_value=0
    ).sort_index()
    
    # Cumulative P&L
    cumulative = pivot.cumsum()
    
    fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)
    
    # Daily P&L as line plot (stacked area doesn't work with mixed +/- values)
    pivot.plot(ax=axes[0], linewidth=1.5, alpha=0.8)
    axes[0].set_title('Daily P&L by Symbol (Top 10 by Absolute Contribution)', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('Daily P&L ($)')
    axes[0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[0].legend(loc='upper left', ncol=2, fontsize=8)
    axes[0].grid(True, alpha=0.3)
    
    # Cumulative P&L
    cumulative.plot(ax=axes[1], linewidth=2, alpha=0.8)
    axes[1].set_title('Cumulative P&L by Symbol (Top 10)', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Cumulative P&L ($)')
    axes[1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[1].legend(loc='upper left', ncol=2, fontsize=8)
    axes[1].grid(True, alpha=0.3)
    
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

## Slippage Cost Overlay

In [None]:
if df_slippage is not None and df_snapshots is not None:
    # Find slippage column
    slippage_col = None
    for col in ['slippage', 'slippage_dollars', 'cost']:
        if col in df_slippage.columns:
            slippage_col = col
            break
    
    if slippage_col is None:
        # Try to compute from expected vs fill price
        for exp_col in ['expected_price', 'signal_price']:
            for fill_col in ['fill_price', 'execution_price']:
                if exp_col in df_slippage.columns and fill_col in df_slippage.columns:
                    df_slippage['slippage_dollars'] = df_slippage[fill_col] - df_slippage[exp_col]
                    slippage_col = 'slippage_dollars'
                    break
    
    if slippage_col:
        # Daily slippage
        daily_slippage = df_slippage.groupby('date')[slippage_col].sum().reset_index()
        daily_slippage.columns = ['date', 'daily_slippage']
        
        # Merge with P&L data
        merged = df_snapshots[['date', 'daily_pnl']].merge(daily_slippage, on='date', how='left')
        merged['daily_slippage'] = merged['daily_slippage'].fillna(0)
        
        # Gross P&L (before slippage)
        merged['gross_pnl'] = merged['daily_pnl'] + merged['daily_slippage']
        
        print("\nSlippage Impact:")
        print("=" * 60)
        print(f"Total slippage cost: ${merged['daily_slippage'].sum():,.2f}")
        print(f"Net P&L: ${merged['daily_pnl'].sum():,.2f}")
        print(f"Gross P&L (before slippage): ${merged['gross_pnl'].sum():,.2f}")
        print(f"Slippage as % of gross P&L: {merged['daily_slippage'].sum() / merged['gross_pnl'].sum() * 100:.2f}%" if merged['gross_pnl'].sum() != 0 else "N/A")
        
        # Plot
        fig, ax = plt.subplots(figsize=(14, 6))
        
        merged['cumulative_net'] = merged['daily_pnl'].cumsum()
        merged['cumulative_gross'] = merged['gross_pnl'].cumsum()
        merged['cumulative_slippage'] = merged['daily_slippage'].cumsum()
        
        ax.plot(merged['date'], merged['cumulative_gross'], linewidth=2, label='Gross P&L', color='blue', alpha=0.7)
        ax.plot(merged['date'], merged['cumulative_net'], linewidth=2, label='Net P&L', color='green', alpha=0.7)
        ax.fill_between(merged['date'], merged['cumulative_gross'], merged['cumulative_net'], 
                        alpha=0.3, color='red', label='Slippage Cost')
        
        ax.set_title('Cumulative P&L: Gross vs Net (Slippage Impact)', fontsize=14, fontweight='bold')
        ax.set_xlabel('Date')
        ax.set_ylabel('Cumulative P&L ($)')
        ax.legend(loc='upper left')
        ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
        ax.grid(True, alpha=0.3)
        
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()
    else:
        print("Could not identify slippage column for overlay")
else:
    print("Slippage data not available for overlay")

## P&L by Direction (Long vs Short)

In [None]:
if df_positions is not None and weight_col and pnl_col:
    # Classify positions by direction
    df_positions['direction'] = np.where(df_positions[weight_col] > 0, 'Long', 'Short')
    
    # Aggregate by direction
    direction_pnl = df_positions.groupby('direction')[pnl_col].sum()
    
    print("\nP&L by Direction:")
    print("=" * 60)
    display(direction_pnl.to_frame('Total P&L'))
    
    # Daily breakdown
    daily_direction = df_positions.groupby(['date', 'direction'])[pnl_col].sum().reset_index()
    pivot_direction = daily_direction.pivot(index='date', columns='direction', values=pnl_col).fillna(0)
    
    # Cumulative
    cumulative_direction = pivot_direction.cumsum()
    
    fig, ax = plt.subplots(figsize=(14, 6))
    
    for col in cumulative_direction.columns:
        color = 'green' if col == 'Long' else 'red'
        ax.plot(cumulative_direction.index, cumulative_direction[col], linewidth=2, label=col, color=color)
    
    ax.set_title('Cumulative P&L by Direction', fontsize=14, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Cumulative P&L ($)')
    ax.legend(loc='upper left')
    ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
    ax.grid(True, alpha=0.3)
    
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
elif df_positions is not None and weight_col:
    print("P&L column not found - direction analysis requires P&L data")

## P&L Attribution by Signal Horizon (Short / Medium / Long)


In [None]:
if df_positions is not None and df_signals is not None and pnl_col and symbol_col:
    # Build per-signal component scores from SMA distances (mirrors alpha model logic)
    signals = df_signals.copy()

    # Normalize symbol column
    if 'symbol' not in signals.columns:
        if symbol_col in signals.columns:
            signals['symbol'] = signals[symbol_col]
        else:
            print("Signals file missing symbol column; cannot do horizon attribution")
            signals = None

    required_cols = ['price', 'sma_short', 'sma_medium', 'sma_long', 'atr']
    if signals is not None:
        missing = [c for c in required_cols if c not in signals.columns]
        if missing:
            print(f"Signals file missing columns: {missing} - cannot do horizon attribution")
            signals = None

    if signals is not None:
        # Ensure numeric values
        for col in required_cols:
            signals[col] = pd.to_numeric(signals[col], errors='coerce')
        signals = signals.dropna(subset=required_cols).copy()

        # Deduplicate signals per symbol/date (keep latest)
        signals = signals.sort_values('date').groupby(['date', 'symbol']).tail(1)

        # Compute SMA distance components
        atr_safe = signals['atr'].replace(0, np.nan)
        signals['dist_short'] = (signals['price'] - signals['sma_short']) / atr_safe
        signals['dist_medium'] = (signals['price'] - signals['sma_medium']) / atr_safe
        signals['dist_long'] = (signals['price'] - signals['sma_long']) / atr_safe

        # Weights from alpha model (update if changed in main.py)
        weights = {'short': 0.5, 'medium': 0.3, 'long': 0.2}
        signals['comp_short'] = weights['short'] * signals['dist_short']
        signals['comp_medium'] = weights['medium'] * signals['dist_medium']
        signals['comp_long'] = weights['long'] * signals['dist_long']
        signals['abs_total'] = signals[['comp_short', 'comp_medium', 'comp_long']].abs().sum(axis=1)

        signals = signals[signals['abs_total'] > 0].copy()

        # Daily position P&L by symbol
        position_pnl_daily = df_positions.groupby(['date', symbol_col])[pnl_col].sum().reset_index()
        position_pnl_daily = position_pnl_daily.rename(columns={symbol_col: 'symbol', pnl_col: 'position_pnl'})

        # Attach most recent signal per symbol (forward-fill)
        signals_sorted = signals.sort_values(['symbol', 'date'])
        pnl_sorted = position_pnl_daily.sort_values(['symbol', 'date'])

        # Defensive datetime conversion (no-op if already datetime)
        signals_sorted['date'] = pd.to_datetime(signals_sorted['date'])
        pnl_sorted['date'] = pd.to_datetime(pnl_sorted['date'])

        merged_horizon = pnl_sorted.merge(
            signals_sorted[['date', 'symbol', 'comp_short', 'comp_medium', 'comp_long', 'abs_total']],
            on=['symbol', 'date'],
            how='left'
        ).sort_values(['symbol', 'date'])

        merged_horizon[['comp_short', 'comp_medium', 'comp_long', 'abs_total']] = (
            merged_horizon.groupby('symbol')[['comp_short', 'comp_medium', 'comp_long', 'abs_total']]
            .ffill()
        )

        merged_horizon = merged_horizon[merged_horizon['abs_total'].notna() & (merged_horizon['abs_total'] > 0)].copy()

        if merged_horizon.empty:
            print("No overlapping signal history with positions; horizon attribution skipped")
        else:
            # Allocate daily P&L proportional to component magnitudes
            merged_horizon['short_share'] = merged_horizon['comp_short'].abs() / merged_horizon['abs_total']
            merged_horizon['medium_share'] = merged_horizon['comp_medium'].abs() / merged_horizon['abs_total']
            merged_horizon['long_share'] = merged_horizon['comp_long'].abs() / merged_horizon['abs_total']

            merged_horizon['short_pnl'] = merged_horizon['position_pnl'] * merged_horizon['short_share']
            merged_horizon['medium_pnl'] = merged_horizon['position_pnl'] * merged_horizon['medium_share']
            merged_horizon['long_pnl'] = merged_horizon['position_pnl'] * merged_horizon['long_share']

            # Daily horizon P&L
            daily_horizon = merged_horizon.groupby('date')[['short_pnl', 'medium_pnl', 'long_pnl']].sum().reset_index()

            # Summary table
            totals = daily_horizon[['short_pnl', 'medium_pnl', 'long_pnl']].sum()
            totals = totals.rename({'short_pnl': 'Short', 'medium_pnl': 'Medium', 'long_pnl': 'Long'})
            totals_df = totals.to_frame('Total P&L')
            total_attributed = totals_df['Total P&L'].sum()
            if total_attributed != 0:
                totals_df['Pct of Attributed'] = totals_df['Total P&L'] / total_attributed
            else:
                totals_df['Pct of Attributed'] = 0.0

            print()
            print("Horizon Attribution Summary (heuristic allocation):")
            print("=" * 60)
            display(totals_df)

            # Coverage vs total position P&L
            covered_pnl = merged_horizon['position_pnl'].sum()
            total_position_pnl = position_pnl_daily['position_pnl'].sum()
            if total_position_pnl != 0:
                coverage_pct = covered_pnl / total_position_pnl * 100
                print(f"Coverage of position P&L by signals: {coverage_pct:.2f}%")
            else:
                print("Coverage of position P&L by signals: N/A (zero total P&L)")

            # Plot 1: Cumulative P&L by horizon
            cumulative_horizon = daily_horizon.set_index('date').cumsum()
            fig, ax = plt.subplots(figsize=(14, 6))
            ax.plot(cumulative_horizon.index, cumulative_horizon['short_pnl'], label='Short', color='#1f77b4', linewidth=2)
            ax.plot(cumulative_horizon.index, cumulative_horizon['medium_pnl'], label='Medium', color='#ff7f0e', linewidth=2)
            ax.plot(cumulative_horizon.index, cumulative_horizon['long_pnl'], label='Long', color='#2ca02c', linewidth=2)
            ax.set_title('Cumulative P&L by Signal Horizon', fontsize=14, fontweight='bold')
            ax.set_xlabel('Date')
            ax.set_ylabel('Cumulative P&L ($)')
            ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
            ax.grid(True, alpha=0.3)
            ax.legend(loc='upper left')
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            plt.show()

            # Plot 2: Monthly P&L stacked by horizon
            monthly_horizon = daily_horizon.copy()
            monthly_horizon['year_month'] = monthly_horizon['date'].dt.to_period('M')
            monthly_horizon = monthly_horizon.groupby('year_month')[['short_pnl', 'medium_pnl', 'long_pnl']].sum()

            fig, ax = plt.subplots(figsize=(14, 6))
            monthly_horizon.plot(kind='bar', stacked=True, ax=ax, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
            ax.set_title('Monthly P&L by Signal Horizon (Stacked)', fontsize=14, fontweight='bold')
            ax.set_xlabel('Month')
            ax.set_ylabel('P&L ($)')
            ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
            ax.grid(True, alpha=0.3, axis='y')
            ax.set_xticklabels([str(p) for p in monthly_horizon.index], rotation=45, ha='right')
            ax.legend(loc='upper left')
            plt.tight_layout()
            plt.show()
else:
    print("Signal horizon attribution skipped (missing positions/signals or P&L column)")


## Monthly P&L Attribution

In [None]:
if df_snapshots is not None:
    # Add month column
    df_snapshots['year_month'] = df_snapshots['date'].dt.to_period('M')
    
    # Monthly P&L
    monthly_pnl = df_snapshots.groupby('year_month')['daily_pnl'].sum()
    
    fig, ax = plt.subplots(figsize=(14, 6))
    
    colors = ['green' if x >= 0 else 'red' for x in monthly_pnl.values]
    ax.bar(range(len(monthly_pnl)), monthly_pnl.values, color=colors, alpha=0.7)
    ax.set_xticks(range(len(monthly_pnl)))
    ax.set_xticklabels([str(p) for p in monthly_pnl.index], rotation=45, ha='right')
    ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
    ax.set_title('Monthly P&L', fontsize=14, fontweight='bold')
    ax.set_xlabel('Month')
    ax.set_ylabel('P&L ($)')
    ax.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.show()
    
    print("\nMonthly P&L Statistics:")
    print("=" * 60)
    print(f"Best month: ${monthly_pnl.max():,.2f} ({monthly_pnl.idxmax()})")
    print(f"Worst month: ${monthly_pnl.min():,.2f} ({monthly_pnl.idxmin()})")
    print(f"Average month: ${monthly_pnl.mean():,.2f}")
    print(f"Positive months: {(monthly_pnl > 0).sum()} / {len(monthly_pnl)}")

## Summary

In [None]:
print("\n" + "=" * 80)
print("P&L ATTRIBUTION SUMMARY")
print("=" * 80)

if df_snapshots is not None:
    total_pnl = df_snapshots['daily_pnl'].sum()
    print(f"\nTotal P&L: ${total_pnl:,.2f}")
    print(f"Trading days: {len(df_snapshots)}")
    print(f"Average daily P&L: ${df_snapshots['daily_pnl'].mean():,.2f}")
    print(f"Daily P&L std: ${df_snapshots['daily_pnl'].std():,.2f}")

if 'symbol_pnl' in dir() and len(symbol_pnl) > 0:
    print(f"\nSymbol Attribution:")
    print(f"  Symbols traded: {len(symbol_pnl)}")
    print(f"  Profitable symbols: {(symbol_pnl > 0).sum()}")
    print(f"  Top contributor: {symbol_pnl.index[0]} (${symbol_pnl.iloc[0]:,.2f})")
    print(f"  Bottom contributor: {symbol_pnl.index[-1]} (${symbol_pnl.iloc[-1]:,.2f})")

if df_slippage is not None and merged is not None and 'daily_slippage' in merged.columns:
    total_slippage = merged['daily_slippage'].sum()
    print(f"\nSlippage Impact:")
    print(f"  Total slippage: ${total_slippage:,.2f}")
    print(f"  Slippage % of gross: {total_slippage / merged['gross_pnl'].sum() * 100:.2f}%" if merged['gross_pnl'].sum() != 0 else "N/A")

print("\n" + "=" * 80)