# Slippage Analysis

Slippage diagnostics for the WolfpackTrend strategy.

**Data Sources:**
- `{TEAM_ID}/slippage.csv` - Per-order slippage (expected vs fill price)
- `{TEAM_ID}/daily_snapshots.csv` - Daily portfolio metrics

**Analysis:**
- Slippage in $ and basis points
- Distribution by symbol and direction
- Daily aggregates
- Relationship to gross exposure and volatility

**Prerequisites:** Run the WolfpackTrend backtest first to generate ObjectStore data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook
from config import TEAM_ID

qb = QuantBook()
print("QuantBook initialized")


def read_csv_from_store(key):
    """Read CSV from ObjectStore with error handling."""
    try:
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f"Empty ObjectStore key: {key}")
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f"Error reading {key}: {e}")
        return None

## Load Data

In [None]:
df_slippage = read_csv_from_store(f"{TEAM_ID}/slippage.csv")
df_snapshots = read_csv_from_store(f"{TEAM_ID}/daily_snapshots.csv")

if df_slippage is not None:
    df_slippage["date"] = pd.to_datetime(df_slippage["date"])
    print(f"Slippage records: {len(df_slippage)}")
    print(f"Columns: {list(df_slippage.columns)}")
else:
    raise ValueError("slippage.csv is required for this notebook. Run the backtest first.")

if df_snapshots is not None:
    df_snapshots["date"] = pd.to_datetime(df_snapshots["date"])
    print(f"Daily snapshots: {len(df_snapshots)}")

## Preview Slippage Data

In [None]:
print("\nSlippage Data Sample:")
print("=" * 80)
display(df_slippage.head(10))

print("\nData Types:")
print(df_slippage.dtypes)

## Compute Slippage Metrics

In [None]:
# Identify price/quantity/slippage columns
expected_col = None
fill_col = None
qty_col = None
slippage_col = None

for col in df_slippage.columns:
    col_lower = col.lower()
    if col_lower == 'slippage_dollars':
        slippage_col = col
    elif col_lower == 'slippage' and slippage_col is None:
        slippage_col = col
    elif 'expected' in col_lower and 'price' in col_lower:
        expected_col = col
    elif 'fill' in col_lower and 'price' in col_lower:
        fill_col = col
    elif col_lower in ['quantity', 'qty', 'shares']:
        qty_col = col

print(f"Expected price column: {expected_col}")
print(f"Fill price column: {fill_col}")
print(f"Quantity column: {qty_col}")
print(f"Slippage column: {slippage_col}")

# Compute slippage in dollars
if slippage_col:
    df_slippage['slippage_dollars'] = df_slippage[slippage_col]
elif expected_col and fill_col:
    if qty_col:
        df_slippage['slippage_dollars'] = (df_slippage[fill_col] - df_slippage[expected_col]) * df_slippage[qty_col]
        print("\nComputed slippage_dollars = (fill_price - expected_price) * quantity")
    else:
        df_slippage['slippage_dollars'] = df_slippage[fill_col] - df_slippage[expected_col]
        print("\nComputed slippage_dollars = fill_price - expected_price (quantity not found)")
else:
    print("\nCould not compute slippage_dollars (missing slippage or price columns)")

# Compute slippage in basis points
if 'slippage_dollars' in df_slippage.columns and qty_col and (expected_col or fill_col):
    price_col = expected_col or fill_col
    notional = df_slippage[price_col].abs() * df_slippage[qty_col].abs()
    notional = notional.replace(0, np.nan)
    df_slippage['slippage_bps'] = (df_slippage['slippage_dollars'] / notional) * 10000
    base = 'expected_price' if expected_col else 'fill_price'
    print(f"Computed slippage_bps = slippage_dollars / ({base} * abs(quantity)) * 10000")
elif 'slippage_dollars' in df_slippage.columns and (expected_col or fill_col):
    print("Could not compute slippage_bps (quantity column not found)")
else:
    print("Could not compute slippage_bps (missing slippage or price columns)")


## Overall Slippage Statistics

In [None]:
print("\nOverall Slippage Statistics:")
print("=" * 80)

if 'slippage_dollars' in df_slippage.columns:
    print(f"\nSlippage in Dollars:")
    print(f"  Total: ${df_slippage['slippage_dollars'].sum():,.2f}")
    print(f"  Mean: ${df_slippage['slippage_dollars'].mean():.4f}")
    print(f"  Std: ${df_slippage['slippage_dollars'].std():.4f}")
    print(f"  Min: ${df_slippage['slippage_dollars'].min():.4f}")
    print(f"  Max: ${df_slippage['slippage_dollars'].max():.4f}")

if 'slippage_bps' in df_slippage.columns:
    print(f"\nSlippage in Basis Points:")
    print(f"  Mean: {df_slippage['slippage_bps'].mean():.2f} bps")
    print(f"  Std: {df_slippage['slippage_bps'].std():.2f} bps")
    print(f"  Median: {df_slippage['slippage_bps'].median():.2f} bps")
    print(f"  Min: {df_slippage['slippage_bps'].min():.2f} bps")
    print(f"  Max: {df_slippage['slippage_bps'].max():.2f} bps")

## Slippage Distribution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram of slippage in dollars
if 'slippage_dollars' in df_slippage.columns:
    axes[0].hist(df_slippage['slippage_dollars'], bins=50, color='steelblue', alpha=0.7, edgecolor='black')
    axes[0].axvline(x=0, color='red', linestyle='--', linewidth=2)
    axes[0].axvline(x=df_slippage['slippage_dollars'].mean(), color='green', linestyle='--', linewidth=2, label=f"Mean: ${df_slippage['slippage_dollars'].mean():.4f}")
    axes[0].set_title('Slippage Distribution (Dollars)', fontsize=12, fontweight='bold')
    axes[0].set_xlabel('Slippage ($)')
    axes[0].set_ylabel('Frequency')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)

# Histogram of slippage in bps
if 'slippage_bps' in df_slippage.columns:
    axes[1].hist(df_slippage['slippage_bps'], bins=50, color='coral', alpha=0.7, edgecolor='black')
    axes[1].axvline(x=0, color='red', linestyle='--', linewidth=2)
    axes[1].axvline(x=df_slippage['slippage_bps'].mean(), color='green', linestyle='--', linewidth=2, label=f"Mean: {df_slippage['slippage_bps'].mean():.2f} bps")
    axes[1].set_title('Slippage Distribution (Basis Points)', fontsize=12, fontweight='bold')
    axes[1].set_xlabel('Slippage (bps)')
    axes[1].set_ylabel('Frequency')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Slippage by Symbol

In [None]:
# Find symbol column
symbol_col = None
for col in df_slippage.columns:
    if col.lower() in ['symbol', 'ticker', 'underlying']:
        symbol_col = col
        break

if symbol_col and 'slippage_dollars' in df_slippage.columns:
    # Aggregate by symbol
    symbol_slippage = df_slippage.groupby(symbol_col).agg({
        'slippage_dollars': ['sum', 'mean', 'count']
    })
    symbol_slippage.columns = ['total_slippage', 'avg_slippage', 'num_orders']
    symbol_slippage = symbol_slippage.sort_values('total_slippage')
    
    print("\nSlippage by Symbol (Top 10 by total slippage):")
    print("=" * 80)
    display(symbol_slippage.head(10))
    
    # Plot top/bottom slippage by symbol
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # Worst slippage (most negative)
    worst = symbol_slippage.nsmallest(10, 'total_slippage')
    axes[0].barh(worst.index, worst['total_slippage'], color='red', alpha=0.7)
    axes[0].set_title('Worst Slippage by Symbol (Most Negative)', fontsize=12)
    axes[0].set_xlabel('Total Slippage ($)')
    axes[0].grid(True, alpha=0.3, axis='x')
    
    # Best slippage (most positive / least negative)
    best = symbol_slippage.nlargest(10, 'total_slippage')
    axes[1].barh(best.index, best['total_slippage'], color='green', alpha=0.7)
    axes[1].set_title('Best Slippage by Symbol (Most Positive)', fontsize=12)
    axes[1].set_xlabel('Total Slippage ($)')
    axes[1].grid(True, alpha=0.3, axis='x')
    
    plt.tight_layout()
    plt.show()
else:
    print("No symbol column found for per-symbol analysis")

## Slippage by Direction

In [None]:
# Find direction column (buy/sell)
direction_col = None
for col in df_slippage.columns:
    if col.lower() in ['direction', 'side', 'order_direction', 'action']:
        direction_col = col
        break

# If no direction column, try to infer from quantity
if direction_col is None:
    for col in df_slippage.columns:
        if col.lower() in ['quantity', 'qty', 'shares']:
            df_slippage['direction'] = np.where(df_slippage[col] > 0, 'BUY', 'SELL')
            direction_col = 'direction'
            print(f"Inferred direction from {col}")
            break

if direction_col and 'slippage_bps' in df_slippage.columns:
    print(f"\nSlippage by Direction ({direction_col}):")
    print("=" * 80)
    
    direction_stats = df_slippage.groupby(direction_col).agg({
        'slippage_dollars': ['sum', 'mean', 'count'],
        'slippage_bps': ['mean', 'median']
    })
    direction_stats.columns = ['total_$', 'avg_$', 'count', 'avg_bps', 'median_bps']
    display(direction_stats)
    
    # Box plot by direction
    fig, ax = plt.subplots(figsize=(10, 6))
    df_slippage.boxplot(column='slippage_bps', by=direction_col, ax=ax)
    ax.set_title('Slippage Distribution by Direction', fontsize=14, fontweight='bold')
    ax.set_xlabel('Direction')
    ax.set_ylabel('Slippage (bps)')
    plt.suptitle('')  # Remove automatic title
    ax.axhline(y=0, color='red', linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()
else:
    print("No direction column found for direction analysis")

## Daily Slippage Aggregates

In [None]:
if 'slippage_dollars' in df_slippage.columns:
    # Aggregate by date
    daily_slippage = df_slippage.groupby('date').agg({
        'slippage_dollars': ['sum', 'mean', 'count']
    })
    daily_slippage.columns = ['total_slippage', 'avg_slippage', 'num_orders']
    daily_slippage = daily_slippage.reset_index()
    
    fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)
    
    # Daily total slippage
    axes[0].bar(daily_slippage['date'], daily_slippage['total_slippage'], color='steelblue', alpha=0.7)
    axes[0].axhline(y=0, color='red', linestyle='--', alpha=0.5)
    axes[0].set_title('Daily Total Slippage', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('Slippage ($)')
    axes[0].grid(True, alpha=0.3)
    
    # Cumulative slippage
    daily_slippage['cumulative_slippage'] = daily_slippage['total_slippage'].cumsum()
    axes[1].plot(daily_slippage['date'], daily_slippage['cumulative_slippage'], linewidth=2, color='steelblue')
    axes[1].fill_between(daily_slippage['date'], 0, daily_slippage['cumulative_slippage'], alpha=0.3)
    axes[1].axhline(y=0, color='red', linestyle='--', alpha=0.5)
    axes[1].set_title('Cumulative Slippage Over Time', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Cumulative Slippage ($)')
    axes[1].grid(True, alpha=0.3)
    
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
    
    print(f"\nTotal cumulative slippage: ${daily_slippage['cumulative_slippage'].iloc[-1]:,.2f}")

## Slippage vs Gross Exposure

In [None]:
if df_snapshots is not None and 'gross_exposure' in df_snapshots.columns:
    # Merge daily slippage with snapshots
    merged = daily_slippage.merge(df_snapshots[['date', 'gross_exposure', 'nav']], on='date', how='inner')
    
    if len(merged) > 0:
        # Compute slippage as % of NAV
        merged['slippage_pct_nav'] = (merged['total_slippage'] / merged['nav']) * 10000  # in bps
        
        fig, axes = plt.subplots(1, 2, figsize=(14, 5))
        
        # Scatter: slippage vs gross exposure
        axes[0].scatter(merged['gross_exposure'] * 100, merged['total_slippage'], alpha=0.6, color='steelblue')
        axes[0].set_title('Daily Slippage vs Gross Exposure', fontsize=12, fontweight='bold')
        axes[0].set_xlabel('Gross Exposure (%)')
        axes[0].set_ylabel('Daily Slippage ($)')
        axes[0].axhline(y=0, color='red', linestyle='--', alpha=0.5)
        axes[0].grid(True, alpha=0.3)
        
        # Scatter: slippage bps vs gross exposure
        axes[1].scatter(merged['gross_exposure'] * 100, merged['slippage_pct_nav'], alpha=0.6, color='coral')
        axes[1].set_title('Daily Slippage (bps of NAV) vs Gross Exposure', fontsize=12, fontweight='bold')
        axes[1].set_xlabel('Gross Exposure (%)')
        axes[1].set_ylabel('Slippage (bps of NAV)')
        axes[1].axhline(y=0, color='red', linestyle='--', alpha=0.5)
        axes[1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Correlation
        corr = merged['gross_exposure'].corr(merged['total_slippage'])
        print(f"\nCorrelation between gross exposure and slippage: {corr:.4f}")
else:
    print("No gross_exposure column found in daily snapshots")

## Slippage vs Estimated Volatility

In [None]:
if df_snapshots is not None and 'estimated_vol' in df_snapshots.columns:
    # Merge daily slippage with snapshots
    merged = daily_slippage.merge(df_snapshots[['date', 'estimated_vol', 'nav']], on='date', how='inner')
    
    if len(merged) > 0:
        # Compute slippage as % of NAV
        merged['slippage_pct_nav'] = (merged['total_slippage'] / merged['nav']) * 10000  # in bps
        
        fig, axes = plt.subplots(1, 2, figsize=(14, 5))
        
        # Scatter: slippage vs volatility
        axes[0].scatter(merged['estimated_vol'] * 100, merged['total_slippage'], alpha=0.6, color='steelblue')
        axes[0].set_title('Daily Slippage vs Estimated Volatility', fontsize=12, fontweight='bold')
        axes[0].set_xlabel('Estimated Volatility (%)')
        axes[0].set_ylabel('Daily Slippage ($)')
        axes[0].axhline(y=0, color='red', linestyle='--', alpha=0.5)
        axes[0].grid(True, alpha=0.3)
        
        # Scatter: slippage bps vs volatility
        axes[1].scatter(merged['estimated_vol'] * 100, merged['slippage_pct_nav'], alpha=0.6, color='coral')
        axes[1].set_title('Daily Slippage (bps of NAV) vs Estimated Volatility', fontsize=12, fontweight='bold')
        axes[1].set_xlabel('Estimated Volatility (%)')
        axes[1].set_ylabel('Slippage (bps of NAV)')
        axes[1].axhline(y=0, color='red', linestyle='--', alpha=0.5)
        axes[1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Correlation
        corr = merged['estimated_vol'].corr(merged['total_slippage'])
        print(f"\nCorrelation between estimated volatility and slippage: {corr:.4f}")
else:
    print("No estimated_vol column found in daily snapshots")

## Summary

In [None]:
print("\n" + "=" * 80)
print("SLIPPAGE ANALYSIS SUMMARY")
print("=" * 80)

print(f"\nTotal orders analyzed: {len(df_slippage)}")

if 'slippage_dollars' in df_slippage.columns:
    total_slippage = df_slippage['slippage_dollars'].sum()
    print(f"Total slippage: ${total_slippage:,.2f}")
    print(f"Average slippage per order: ${df_slippage['slippage_dollars'].mean():.4f}")

if 'slippage_bps' in df_slippage.columns:
    print(f"Average slippage (bps): {df_slippage['slippage_bps'].mean():.2f}")
    print(f"Median slippage (bps): {df_slippage['slippage_bps'].median():.2f}")

if df_snapshots is not None and 'nav' in df_snapshots.columns:
    final_nav = df_snapshots['nav'].iloc[-1]
    initial_nav = df_snapshots['nav'].iloc[0]
    total_return = (final_nav / initial_nav - 1) * 100
    slippage_drag = (total_slippage / initial_nav) * 100 if 'slippage_dollars' in df_slippage.columns else 0
    print(f"\nSlippage as % of starting NAV: {slippage_drag:.4f}%")
    print(f"Total strategy return: {total_return:.2f}%")

print("\n" + "=" * 80)