<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [6]</a>'.</span>

In [1]:
# Parameters
run_dir = "/Users/daws/ADMF-PC/config/bollinger/results/20250627_094038"
config_name = "bollinger"
symbols = ["SPY"]
timeframe = "5m"
min_strategies_to_analyze = 20
sharpe_threshold = 1.0
correlation_threshold = 0.7
top_n_strategies = 10
ensemble_size = 5
calculate_all_performance = True
performance_limit = 100


# Trade & Risk Analysis Notebook

This notebook analyzes trading performance through orders, fills, and position events,
with special focus on risk management exits (stop loss, take profit, trailing stop).

In [2]:
import pandas as pd
import numpy as np
import json
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Configure display
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 100)

# Style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

## 1. Load Trace Data

In [3]:
# Set the results directory
results_dir = Path('.')  # Assumes notebook is run from results directory
traces_dir = results_dir / 'traces'

# Load all trace files
def load_trace_files(traces_dir):
    """Load all trace files and return as dict of DataFrames."""
    traces = {}
    
    # Strategy signals
    signals_path = list(traces_dir.rglob('signals/*/*.parquet'))
    if signals_path:
        traces['signals'] = pd.read_parquet(signals_path[0])
    
    # Portfolio orders
    orders_path = traces_dir / 'portfolio' / 'orders' / 'portfolio_orders.parquet'
    if orders_path.exists():
        traces['orders'] = pd.read_parquet(orders_path)
    
    # Execution fills
    fills_path = traces_dir / 'execution' / 'fills' / 'execution_fills.parquet'
    if fills_path.exists():
        traces['fills'] = pd.read_parquet(fills_path)
    
    # Position events
    pos_open_path = traces_dir / 'portfolio' / 'positions_open' / 'position_open.parquet'
    if pos_open_path.exists():
        traces['position_open'] = pd.read_parquet(pos_open_path)
    
    pos_close_path = traces_dir / 'portfolio' / 'positions_close' / 'position_close.parquet'
    if pos_close_path.exists():
        traces['position_close'] = pd.read_parquet(pos_close_path)
    
    return traces

traces = load_trace_files(traces_dir)
print(f"Loaded trace files: {list(traces.keys())}")

Loaded trace files: []


## 2. Parse Metadata and Build Trade Records

In [4]:
def parse_metadata(df, col='metadata'):
    """Parse JSON metadata column into separate columns."""
    if col not in df.columns or len(df) == 0:
        return df
    
    # Parse JSON metadata
    metadata_list = []
    for idx, row in df.iterrows():
        try:
            metadata = json.loads(row[col]) if row[col] else {}
            metadata_list.append(metadata)
        except:
            metadata_list.append({})
    
    # Create DataFrame from metadata
    metadata_df = pd.DataFrame(metadata_list)
    
    # Combine with original, avoiding duplicate columns
    for col in metadata_df.columns:
        if col not in df.columns:
            df[col] = metadata_df[col]
    
    return df

# Parse metadata for all traces
for key in ['orders', 'fills', 'position_open', 'position_close']:
    if key in traces:
        traces[key] = parse_metadata(traces[key])
        print(f"Parsed {key}: {len(traces[key])} records")

## 3. Reconstruct Complete Trades

In [5]:
def reconstruct_trades(traces):
    """Reconstruct complete trades from orders, fills, and position events."""
    trades = []
    
    # Match orders with fills
    if 'orders' in traces and 'fills' in traces:
        orders_df = traces['orders'].copy()
        fills_df = traces['fills'].copy()
        
        # Add order index for matching
        orders_df['order_idx'] = range(len(orders_df))
        fills_df['fill_idx'] = range(len(fills_df))
        
        # Simple matching by bar index (assumes 1:1 order:fill)
        for i, (_, order) in enumerate(orders_df.iterrows()):
            if i < len(fills_df):
                fill = fills_df.iloc[i]
                
                trade = {
                    'order_idx': order['idx'],
                    'fill_idx': fill['idx'],
                    'symbol': order.get('symbol', 'UNKNOWN'),
                    'side': order.get('side', 'UNKNOWN'),
                    'quantity': order.get('quantity', 0),
                    'order_price': order.get('price', 0),
                    'fill_price': fill.get('price', 0),
                    'order_time': order['ts'],
                    'fill_time': fill['ts'],
                    'strategy_id': order.get('strategy_id', 'unknown')
                }
                trades.append(trade)
    
    trades_df = pd.DataFrame(trades)
    
    # Add position events if available
    if 'position_open' in traces and 'position_close' in traces:
        # Match trades with position events
        # This is simplified - in reality would need more sophisticated matching
        pos_open = traces['position_open']
        pos_close = traces['position_close']
        
        if len(pos_open) > 0 and len(pos_close) > 0:
            # Add exit information to trades
            for i, close_event in pos_close.iterrows():
                exit_type = close_event.get('exit_type', 'unknown')
                exit_reason = close_event.get('exit_reason', 'unknown')
                if i < len(trades_df):
                    trades_df.loc[trades_df.index[i], 'exit_type'] = exit_type
                    trades_df.loc[trades_df.index[i], 'exit_reason'] = exit_reason
    
    return trades_df

trades_df = reconstruct_trades(traces)
print(f"Reconstructed {len(trades_df)} trades")

Reconstructed 0 trades


## 4. Analyze Risk Management Exits

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [6]:
# Analyze position close events for risk exits
if 'position_close' in traces:
    pos_close = traces['position_close']
    
    print("=== Risk Management Exit Analysis ===")
    print(f"Total positions closed: {len(pos_close)}")
    
    # Count exit types
    if 'exit_type' in pos_close.columns:
        exit_counts = pos_close['exit_type'].value_counts()
        print("
Exit types:")
        for exit_type, count in exit_counts.items():
            print(f"  {exit_type}: {count} ({count/len(pos_close)*100:.1f}%)")
    
    # Analyze exit reasons
    if 'exit_reason' in pos_close.columns:
        print("
Exit reasons:")
        for i, row in pos_close.iterrows():
            print(f"  Trade {i+1}: {row.get('exit_type', 'unknown')} - {row.get('exit_reason', 'unknown')}")
            if hasattr(row, 'realized_pnl'):
                print(f"    PnL: ${row['realized_pnl']:.2f}")

SyntaxError: unterminated string literal (detected at line 11) (3970604835.py, line 11)

## 5. Signal vs Position Analysis

In [None]:
# Analyze signal persistence after risk exits
if 'signals' in traces and 'position_close' in traces:
    signals_df = traces['signals']
    pos_close = traces['position_close']
    
    print("=== Signal Persistence After Risk Exits ===")
    
    # For each position close due to risk
    risk_closes = pos_close[pos_close.get('exit_type', '').isin(['stop_loss', 'trailing_stop'])] if 'exit_type' in pos_close.columns else pd.DataFrame()
    
    if len(risk_closes) > 0:
        for _, close_event in risk_closes.iterrows():
            close_bar = close_event['idx']
            
            # Check signal value at close and next few bars
            next_signals = signals_df[signals_df['idx'].between(close_bar, close_bar + 10)]
            
            if len(next_signals) > 0:
                print(f"
Risk exit at bar {close_bar}:")
                print(f"  Exit type: {close_event.get('exit_type', 'unknown')}")
                print(f"  Signal values after exit:")
                for _, sig in next_signals.iterrows():
                    print(f"    Bar {sig['idx']}: signal = {sig['val']}")

## 6. Performance Metrics by Exit Type

In [None]:
# Calculate performance metrics grouped by exit type
if 'position_close' in traces and 'realized_pnl' in traces['position_close'].columns:
    pos_close = traces['position_close']
    
    # Group by exit type
    if 'exit_type' in pos_close.columns:
        metrics_by_exit = pos_close.groupby('exit_type').agg({
            'realized_pnl': ['count', 'sum', 'mean', 'std'],
            'idx': ['min', 'max']  # First and last bar
        }).round(2)
        
        print("=== Performance by Exit Type ===")
        print(metrics_by_exit)
        
        # Visualize
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
        
        # Count by exit type
        exit_counts = pos_close['exit_type'].value_counts()
        exit_counts.plot(kind='bar', ax=ax1, title='Number of Exits by Type')
        ax1.set_xlabel('Exit Type')
        ax1.set_ylabel('Count')
        
        # PnL by exit type
        pos_close.boxplot(column='realized_pnl', by='exit_type', ax=ax2)
        ax2.set_title('PnL Distribution by Exit Type')
        ax2.set_xlabel('Exit Type')
        ax2.set_ylabel('Realized PnL ($)')
        
        plt.tight_layout()
        plt.show()

## 7. Trade Duration Analysis

In [None]:
# Analyze trade durations
if 'position_open' in traces and 'position_close' in traces:
    pos_open = traces['position_open']
    pos_close = traces['position_close']
    
    if len(pos_open) > 0 and len(pos_close) > 0:
        # Calculate trade durations (simplified - assumes matching order)
        durations = []
        for i in range(min(len(pos_open), len(pos_close))):
            open_bar = pos_open.iloc[i]['idx']
            close_bar = pos_close.iloc[i]['idx']
            duration = close_bar - open_bar
            exit_type = pos_close.iloc[i].get('exit_type', 'unknown')
            
            durations.append({
                'duration_bars': duration,
                'exit_type': exit_type,
                'pnl': pos_close.iloc[i].get('realized_pnl', 0)
            })
        
        duration_df = pd.DataFrame(durations)
        
        print("=== Trade Duration Analysis ===")
        print(f"Average duration: {duration_df['duration_bars'].mean():.1f} bars")
        print(f"Median duration: {duration_df['duration_bars'].median():.1f} bars")
        
        # Duration by exit type
        if 'exit_type' in duration_df.columns:
            print("
Average duration by exit type:")
            for exit_type, group in duration_df.groupby('exit_type'):
                print(f"  {exit_type}: {group['duration_bars'].mean():.1f} bars")

## 8. Order Flow Analysis

In [None]:
# Analyze order patterns
if 'orders' in traces:
    orders_df = traces['orders']
    
    print("=== Order Flow Analysis ===")
    print(f"Total orders: {len(orders_df)}")
    
    # Order frequency
    if len(orders_df) > 1:
        order_gaps = orders_df['idx'].diff().dropna()
        print(f"
Average bars between orders: {order_gaps.mean():.1f}")
        print(f"Median bars between orders: {order_gaps.median():.1f}")
        
        # Check for immediate re-entry after exit
        immediate_reentries = (order_gaps <= 1).sum()
        print(f"
Immediate re-entries (≤1 bar): {immediate_reentries}")
        
        # Visualize order frequency
        plt.figure(figsize=(10, 5))
        plt.hist(order_gaps, bins=50, edgecolor='black', alpha=0.7)
        plt.xlabel('Bars Between Orders')
        plt.ylabel('Frequency')
        plt.title('Distribution of Time Between Orders')
        plt.axvline(order_gaps.mean(), color='red', linestyle='--', label=f'Mean: {order_gaps.mean():.1f}')
        plt.axvline(order_gaps.median(), color='green', linestyle='--', label=f'Median: {order_gaps.median():.1f}')
        plt.legend()
        plt.show()

## 9. Risk Management Effectiveness

In [None]:
# Analyze effectiveness of risk management
if 'position_close' in traces and 'realized_pnl' in traces['position_close'].columns:
    pos_close = traces['position_close']
    
    print("=== Risk Management Effectiveness ===")
    
    # Calculate metrics
    total_trades = len(pos_close)
    profitable_trades = (pos_close['realized_pnl'] > 0).sum() if 'realized_pnl' in pos_close.columns else 0
    
    if total_trades > 0:
        win_rate = profitable_trades / total_trades * 100
        avg_win = pos_close[pos_close['realized_pnl'] > 0]['realized_pnl'].mean() if profitable_trades > 0 else 0
        avg_loss = pos_close[pos_close['realized_pnl'] < 0]['realized_pnl'].mean() if (total_trades - profitable_trades) > 0 else 0
        
        print(f"Total trades: {total_trades}")
        print(f"Win rate: {win_rate:.1f}%")
        print(f"Average win: ${avg_win:.2f}")
        print(f"Average loss: ${avg_loss:.2f}")
        
        if avg_loss != 0:
            profit_factor = abs(avg_win / avg_loss)
            print(f"Profit factor: {profit_factor:.2f}")
        
        # Analyze by exit type
        if 'exit_type' in pos_close.columns:
            print("
Win rate by exit type:")
            for exit_type, group in pos_close.groupby('exit_type'):
                wins = (group['realized_pnl'] > 0).sum()
                total = len(group)
                win_pct = wins / total * 100 if total > 0 else 0
                print(f"  {exit_type}: {win_pct:.1f}% ({wins}/{total})")

## 10. Summary & Recommendations

In [None]:
print("=== SUMMARY ===")
print(f"
Data Quality:")
for key, df in traces.items():
    print(f"  {key}: {len(df)} records")

print(f"
Key Findings:")
if 'orders' in traces and 'fills' in traces:
    print(f"  - Order fill rate: {len(traces['fills'])/len(traces['orders'])*100:.1f}%")

if 'position_close' in traces and 'exit_type' in traces['position_close'].columns:
    risk_exits = traces['position_close']['exit_type'].isin(['stop_loss', 'take_profit', 'trailing_stop']).sum()
    print(f"  - Risk management exits: {risk_exits} ({risk_exits/len(traces['position_close'])*100:.1f}%)")

print("
=== RECOMMENDATIONS ===")
print("1. Check for immediate re-entry after risk exits")
print("2. Analyze signal persistence after stop-loss exits")
print("3. Consider implementing a 'cooldown' period after risk exits")
print("4. Review risk parameters if too many stop-loss exits")