# Global Signal Analysis

This notebook analyzes signals directly from the global traces store.
No run directory required - just query by strategy type, symbol, or parameters.

In [None]:
# Parameters - for papermill or manual use
strategy_type = None  # e.g., 'bollinger_bands', 'ma_crossover', None for all
symbol = 'SPY'  # Filter by symbol
timeframe = '5m'  # Filter by timeframe
traces_dir = '/Users/daws/ADMF-PC/traces'  # Global traces directory

# Analysis parameters
min_strategies_to_analyze = 5
sharpe_threshold = 1.0
correlation_threshold = 0.7
top_n_strategies = 10
ensemble_size = 5
calculate_all_performance = True
performance_limit = 100

# Enhanced analysis parameters
execution_cost_bps = 1.0
analyze_stop_losses = True
stop_loss_levels = [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.75, 1.0]
profit_target_levels = [0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.75, 1.0]
market_timezone = 'America/New_York'

## Setup

In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

# Setup paths
traces_path = Path(traces_dir)
store_path = traces_path / 'store'
strategy_index_path = traces_path / 'strategy_index.parquet'

print(f"📁 Global traces directory: {traces_path}")
print(f"📁 Store directory: {store_path}")
print(f"📊 Analyzing strategies: {strategy_type if strategy_type else 'ALL'}")
print(f"📈 Symbol: {symbol if symbol else 'ALL'}")
print(f"⏱️ Timeframe: {timeframe if timeframe else 'ALL'}")

In [None]:
# Load analytics modules
import sys
from pathlib import Path

# Find project root and add to path
project_root = Path(traces_dir).parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Import data loading functions
from src.analytics.modules.core.data_loading import (
    load_global_traces,
    load_strategy_index,
    load_market_data
)

print("✅ Analytics modules loaded")

## Load Strategy Index

In [None]:
# Load global strategy index
try:
    strategy_index = load_strategy_index(traces_dir)
    print(f"✅ Loaded {len(strategy_index)} strategies from global index")
    
    # Show strategy type distribution
    print("\nStrategies by type:")
    by_type = strategy_index['strategy_type'].value_counts()
    for stype, count in by_type.items():
        print(f"  {stype}: {count}")
    
    # Show available columns
    print(f"\nColumns available: {list(strategy_index.columns)[:10]}...")
    
except FileNotFoundError:
    print("❌ No strategy index found. Run signal generation first.")
    strategy_index = pd.DataFrame()

In [None]:
# Filter strategies based on parameters
if not strategy_index.empty:
    filtered_strategies = strategy_index.copy()
    
    # Apply filters
    if strategy_type:
        filtered_strategies = filtered_strategies[filtered_strategies['strategy_type'] == strategy_type]
        print(f"Filtered to {strategy_type}: {len(filtered_strategies)} strategies")
    
    if symbol:
        filtered_strategies = filtered_strategies[filtered_strategies['symbol'] == symbol]
        print(f"Filtered to {symbol}: {len(filtered_strategies)} strategies")
    
    if timeframe:
        filtered_strategies = filtered_strategies[filtered_strategies['timeframe'] == timeframe]
        print(f"Filtered to {timeframe}: {len(filtered_strategies)} strategies")
    
    print(f"\n📊 Total matching strategies: {len(filtered_strategies)}")
    
    if len(filtered_strategies) < min_strategies_to_analyze:
        print(f"\n⚠️ Only {len(filtered_strategies)} strategies found (minimum {min_strategies_to_analyze} recommended)")
        print("\nAvailable combinations:")
        print(strategy_index.groupby(['strategy_type', 'symbol', 'timeframe']).size().to_frame('count'))
else:
    filtered_strategies = pd.DataFrame()
    print("⚠️ No strategies to filter")

## Load Market Data

In [None]:
# Load market data from global data directory
market_data = None

if symbol and timeframe:
    try:
        # Try loading from project data directory
        data_paths = [
            project_root / f'data/{symbol}_{timeframe}.csv',
            project_root / f'data/{symbol}.csv',
            Path(f'/Users/daws/ADMF-PC/data/{symbol}_{timeframe}.csv'),
            Path(f'/Users/daws/ADMF-PC/data/{symbol}.csv')
        ]
        
        for data_path in data_paths:
            if data_path.exists():
                market_data = pd.read_csv(data_path)
                market_data['timestamp'] = pd.to_datetime(market_data['timestamp'])
                market_data = market_data.sort_values('timestamp')
                print(f"✅ Loaded market data from: {data_path}")
                print(f"   Date range: {market_data['timestamp'].min()} to {market_data['timestamp'].max()}")
                print(f"   Total bars: {len(market_data):,}")
                break
        
        if market_data is None:
            print(f"❌ Could not find market data for {symbol}_{timeframe}")
            print("Tried paths:")
            for path in data_paths:
                print(f"  - {path}")
            
    except Exception as e:
        print(f"Error loading market data: {e}")
else:
    print("⚠️ Skipping market data load (need both symbol and timeframe)")

## Performance Calculation Functions

In [None]:
# Import performance calculation functions from the original notebook
def extract_trades(strategy_hash, trace_path, market_data, execution_cost_bps=1.0):
    """Extract trades from signal trace with execution costs."""
    try:
        # Load signals from global store
        signals = pd.read_parquet(trace_path)
        signals['ts'] = pd.to_datetime(signals['ts'])
        
        # Merge with market data
        df = market_data.merge(
            signals[['ts', 'val', 'px']], 
            left_on='timestamp', 
            right_on='ts', 
            how='left'
        )
        
        # Forward fill signals
        df['signal'] = df['val'].ffill().fillna(0)
        df['position'] = df['signal'].replace({0: 0, 1: 1, -1: -1})
        df['position_change'] = df['position'].diff().fillna(0)
        
        trades = []
        current_trade = None
        
        for idx, row in df.iterrows():
            if row['position_change'] != 0 and row['position'] != 0:
                # New position opened
                if current_trade is None:
                    current_trade = {
                        'entry_time': row['timestamp'],
                        'entry_price': row['px'] if pd.notna(row['px']) else row['close'],
                        'direction': row['position'],
                        'entry_idx': idx
                    }
            elif current_trade is not None and (row['position'] == 0 or row['position_change'] != 0):
                # Position closed
                exit_price = row['px'] if pd.notna(row['px']) else row['close']
                
                # Calculate return
                if current_trade['direction'] == 1:  # Long
                    raw_return = (exit_price - current_trade['entry_price']) / current_trade['entry_price']
                else:  # Short
                    raw_return = (current_trade['entry_price'] - exit_price) / current_trade['entry_price']
                
                # Apply execution costs
                cost_adjustment = execution_cost_bps / 10000
                net_return = raw_return - cost_adjustment
                
                trade = {
                    'strategy_hash': strategy_hash,
                    'entry_time': current_trade['entry_time'],
                    'exit_time': row['timestamp'],
                    'entry_price': current_trade['entry_price'],
                    'exit_price': exit_price,
                    'direction': current_trade['direction'],
                    'raw_return': raw_return,
                    'execution_cost': cost_adjustment,
                    'net_return': net_return,
                    'duration_minutes': (row['timestamp'] - current_trade['entry_time']).total_seconds() / 60,
                    'entry_idx': current_trade['entry_idx'],
                    'exit_idx': idx
                }
                trades.append(trade)
                
                # Reset for next trade
                current_trade = None
                if row['position'] != 0 and row['position_change'] != 0:
                    # Immediately open new position (reversal)
                    current_trade = {
                        'entry_time': row['timestamp'],
                        'entry_price': row['px'] if pd.notna(row['px']) else row['close'],
                        'direction': row['position'],
                        'entry_idx': idx
                    }
        
        return pd.DataFrame(trades)
    except Exception as e:
        print(f"Error extracting trades for {strategy_hash[:8]}: {e}")
        return pd.DataFrame()

def calculate_performance(strategy_hash, trace_path, market_data, execution_cost_bps=1.0):
    """Calculate performance metrics using trade-based approach."""
    try:
        # Extract actual trades
        trades = extract_trades(strategy_hash, trace_path, market_data, execution_cost_bps)
        
        if len(trades) == 0:
            return {
                'total_return': 0,
                'sharpe_ratio': 0,
                'max_drawdown': 0,
                'num_trades': 0,
                'win_rate': 0,
                'avg_return_per_trade': 0,
                'profit_factor': 0,
                'total_execution_cost': 0
            }
        
        # Calculate cumulative returns from trades
        trades = trades.sort_values('entry_time').reset_index(drop=True)
        trades['cum_return'] = (1 + trades['net_return']).cumprod()
        total_return = trades['cum_return'].iloc[-1] - 1
        
        # Calculate Sharpe ratio
        if trades['net_return'].std() > 0:
            # Annualize based on average trades per day
            days_in_data = (trades['exit_time'].max() - trades['entry_time'].min()).days
            if days_in_data > 0:
                trades_per_day = len(trades) / days_in_data
                annualization_factor = np.sqrt(252 * trades_per_day)
            else:
                annualization_factor = np.sqrt(252)
            
            sharpe_ratio = trades['net_return'].mean() / trades['net_return'].std() * annualization_factor
        else:
            sharpe_ratio = 0
            
        # Max drawdown
        cummax = trades['cum_return'].expanding().max()
        drawdown = (trades['cum_return'] / cummax - 1)
        max_dd = drawdown.min()
        
        # Win rate and profit factor
        winning_trades = trades[trades['net_return'] > 0]
        losing_trades = trades[trades['net_return'] <= 0]
        
        win_rate = len(winning_trades) / len(trades)
        
        if len(losing_trades) > 0 and losing_trades['net_return'].sum() != 0:
            profit_factor = winning_trades['net_return'].sum() / abs(losing_trades['net_return'].sum())
        else:
            profit_factor = 999.99 if len(winning_trades) > 0 else 0
        
        return {
            'total_return': total_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_dd,
            'num_trades': len(trades),
            'win_rate': win_rate,
            'avg_return_per_trade': trades['net_return'].mean(),
            'profit_factor': profit_factor,
            'total_execution_cost': trades['execution_cost'].sum()
        }
    except Exception as e:
        print(f"Error calculating performance for {strategy_hash}: {e}")
        return None

## Calculate Performance

In [None]:
# Calculate performance for filtered strategies
performance_results = []

if not filtered_strategies.empty and market_data is not None:
    # Determine strategies to analyze
    strategies_to_analyze = filtered_strategies
    
    if not calculate_all_performance and len(filtered_strategies) > performance_limit:
        print(f"Note: Large set detected ({len(filtered_strategies)} strategies)")
        print(f"Limiting analysis to {performance_limit} strategies")
        
        # Sample diverse strategies
        strategies_to_analyze = filtered_strategies.sample(n=performance_limit, random_state=42)
    
    print(f"\nCalculating performance for {len(strategies_to_analyze)} strategies...")
    print(f"Execution cost: {execution_cost_bps} basis points round-trip")
    
    for idx, (_, row) in enumerate(strategies_to_analyze.iterrows()):
        if idx % 10 == 0:
            print(f"  Progress: {idx}/{len(strategies_to_analyze)} ({idx/len(strategies_to_analyze)*100:.1f}%)")
        
        # Use trace_path from index
        trace_path = Path(row['trace_path'])
        
        if trace_path.exists():
            perf = calculate_performance(row['strategy_hash'], trace_path, market_data, execution_cost_bps)
            
            if perf:
                # Combine strategy info with performance
                result = {**row.to_dict(), **perf}
                performance_results.append(result)
    
    print(f"  Progress: {len(strategies_to_analyze)}/{len(strategies_to_analyze)} (100.0%)")
    
    performance_df = pd.DataFrame(performance_results)
    print(f"\n✅ Calculated performance for {len(performance_df)} strategies")
else:
    performance_df = pd.DataFrame()
    if filtered_strategies.empty:
        print("⚠️ No strategies to analyze")
    else:
        print("⚠️ No market data available for performance calculation")

## Display Results

In [None]:
# Display top strategies
if len(performance_df) > 0:
    print("\n🏆 Top 20 Strategies by Sharpe Ratio")
    print("=" * 120)
    
    # Sort by Sharpe ratio
    top_strategies = performance_df.nlargest(20, 'sharpe_ratio')
    
    # Display metrics
    print(f"{'Rank':<5} {'Strategy Type':<20} {'Hash':<12} {'Sharpe':<10} {'Return':<12} {'Win Rate':<10} {'Avg/Trade':<12} {'# Trades':<10}")
    print("-" * 120)
    
    for idx, (_, row) in enumerate(top_strategies.iterrows(), 1):
        print(f"{idx:<5} {row['strategy_type']:<20} {row['strategy_hash'][:10]:<12} "
              f"{row['sharpe_ratio']:>9.2f} {row['total_return']*100:>11.1f}% "
              f"{row['win_rate']*100:>9.1f}% {row['avg_return_per_trade']*100:>11.3f}% "
              f"{row['num_trades']:>9}")
    
    # Summary statistics
    print("\n📊 Overall Performance Summary:")
    print(f"Total strategies analyzed: {len(performance_df)}")
    print(f"Average win rate: {performance_df['win_rate'].mean()*100:.1f}%")
    print(f"Average return per trade: {performance_df['avg_return_per_trade'].mean()*100:.3f}%")
    print(f"Strategies with positive Sharpe: {(performance_df['sharpe_ratio'] > 0).sum()} ({(performance_df['sharpe_ratio'] > 0).mean()*100:.1f}%)")
    print(f"Strategies with win rate > 50%: {(performance_df['win_rate'] > 0.5).sum()} ({(performance_df['win_rate'] > 0.5).mean()*100:.1f}%)")
    
    # Show parameter ranges if available
    param_cols = [col for col in performance_df.columns 
                  if col not in ['strategy_hash', 'strategy_type', 'symbol', 'timeframe', 
                                'constraints', 'trace_path', 'first_seen', 'full_config',
                                'total_return', 'sharpe_ratio', 'max_drawdown', 'num_trades',
                                'win_rate', 'avg_return_per_trade', 'profit_factor', 'total_execution_cost']]
    
    if param_cols:
        print("\n📈 Parameter Ranges in Top 10:")
        top_10 = performance_df.nlargest(10, 'sharpe_ratio')
        for col in param_cols:
            if pd.api.types.is_numeric_dtype(top_10[col]) and top_10[col].notna().any():
                print(f"  {col}: {top_10[col].min():.2f} - {top_10[col].max():.2f}")

## Visualizations

In [None]:
# Performance visualizations
if len(performance_df) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # 1. Sharpe ratio distribution
    ax = axes[0, 0]
    performance_df['sharpe_ratio'].hist(bins=30, ax=ax)
    ax.axvline(0, color='red', linestyle='--', alpha=0.5)
    ax.set_title('Sharpe Ratio Distribution')
    ax.set_xlabel('Sharpe Ratio')
    ax.set_ylabel('Count')
    
    # 2. Win rate distribution
    ax = axes[0, 1]
    performance_df['win_rate'].hist(bins=30, ax=ax)
    ax.axvline(0.5, color='red', linestyle='--', alpha=0.5)
    ax.set_title('Win Rate Distribution')
    ax.set_xlabel('Win Rate')
    ax.set_ylabel('Count')
    
    # 3. Risk-return scatter
    ax = axes[1, 0]
    ax.scatter(performance_df['max_drawdown']*100, performance_df['sharpe_ratio'], alpha=0.6)
    ax.set_xlabel('Max Drawdown (%)')
    ax.set_ylabel('Sharpe Ratio')
    ax.set_title('Risk-Return Profile')
    ax.grid(True, alpha=0.3)
    
    # 4. Average return per trade
    ax = axes[1, 1]
    performance_df['avg_return_per_trade'].hist(bins=50, ax=ax)
    ax.axvline(0, color='red', linestyle='--', alpha=0.5)
    ax.set_title('Average Return per Trade Distribution')
    ax.set_xlabel('Average Return per Trade')
    ax.set_ylabel('Count')
    ax.set_xlim(-0.02, 0.02)  # Focus on realistic range
    
    plt.tight_layout()
    plt.show()
    
    # Additional analysis by strategy type
    if strategy_type is None:
        print("\n📊 Performance by Strategy Type:")
        type_summary = performance_df.groupby('strategy_type').agg({
            'sharpe_ratio': ['mean', 'std', 'max'],
            'win_rate': 'mean',
            'avg_return_per_trade': 'mean',
            'strategy_hash': 'count'
        }).round(3)
        
        type_summary.columns = ['Avg Sharpe', 'Std Sharpe', 'Max Sharpe', 'Avg Win Rate', 'Avg Return/Trade', 'Count']
        print(type_summary.sort_values('Avg Sharpe', ascending=False))

## Export Results

In [None]:
# Export results
if len(performance_df) > 0:
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    # Create exports directory
    exports_dir = Path('strategy_analysis_exports')
    exports_dir.mkdir(exist_ok=True)
    
    # Save performance results
    filename_parts = []
    if strategy_type:
        filename_parts.append(strategy_type)
    if symbol:
        filename_parts.append(symbol)
    if timeframe:
        filename_parts.append(timeframe)
    filename_parts.append(timestamp)
    
    filename_base = '_'.join(filename_parts)
    
    # Save full results
    performance_df.to_csv(exports_dir / f'{filename_base}_performance.csv', index=False)
    print(f"\n✅ Exported performance results to: {exports_dir / f'{filename_base}_performance.csv'}")
    
    # Save top strategies summary
    top_20 = performance_df.nlargest(20, 'sharpe_ratio')
    top_20[['strategy_hash', 'strategy_type', 'sharpe_ratio', 'total_return', 
            'win_rate', 'avg_return_per_trade', 'num_trades']].to_csv(
        exports_dir / f'{filename_base}_top20.csv', index=False
    )
    print(f"✅ Exported top 20 strategies to: {exports_dir / f'{filename_base}_top20.csv'}")
    
    # Create summary report
    summary = {
        'query': {
            'strategy_type': strategy_type,
            'symbol': symbol,
            'timeframe': timeframe,
            'timestamp': timestamp
        },
        'results': {
            'total_strategies': len(performance_df),
            'avg_sharpe': float(performance_df['sharpe_ratio'].mean()),
            'best_sharpe': float(performance_df['sharpe_ratio'].max()),
            'avg_win_rate': float(performance_df['win_rate'].mean()),
            'strategies_profitable': int((performance_df['avg_return_per_trade'] > 0).sum()),
            'execution_cost_bps': execution_cost_bps
        },
        'best_strategy': {
            'hash': top_20.iloc[0]['strategy_hash'],
            'type': top_20.iloc[0]['strategy_type'],
            'sharpe': float(top_20.iloc[0]['sharpe_ratio']),
            'return': float(top_20.iloc[0]['total_return']),
            'win_rate': float(top_20.iloc[0]['win_rate'])
        }
    }
    
    with open(exports_dir / f'{filename_base}_summary.json', 'w') as f:
        json.dump(summary, f, indent=2)
    
    print(f"✅ Exported summary to: {exports_dir / f'{filename_base}_summary.json'}")
else:
    print("\n⚠️ No results to export")

## Summary

In [None]:
# Final summary
print("=" * 80)
print("ANALYSIS COMPLETE")
print("=" * 80)

if len(performance_df) > 0:
    print(f"\n✅ Analyzed {len(performance_df)} strategies")
    print(f"✅ Best Sharpe ratio: {performance_df['sharpe_ratio'].max():.2f}")
    print(f"✅ Average win rate: {performance_df['win_rate'].mean()*100:.1f}%")
    
    profitable = performance_df[performance_df['avg_return_per_trade'] > 0]
    print(f"\n💰 Profitable strategies: {len(profitable)} / {len(performance_df)} ({len(profitable)/len(performance_df)*100:.1f}%)")
    
    if len(profitable) > 0:
        print(f"\nTop profitable strategy:")
        best = profitable.nlargest(1, 'sharpe_ratio').iloc[0]
        print(f"  Type: {best['strategy_type']}")
        print(f"  Hash: {best['strategy_hash']}")
        print(f"  Sharpe: {best['sharpe_ratio']:.2f}")
        print(f"  Win Rate: {best['win_rate']*100:.1f}%")
        print(f"  Avg Return/Trade: {best['avg_return_per_trade']*100:.3f}%")
else:
    print("\n⚠️ No strategies analyzed")
    print("\nTry:")
    print("1. Running signal generation first")
    print("2. Adjusting filter parameters")
    print("3. Checking available strategy types in the global index")

print("\n" + "=" * 80)