# Global Signal Research Notebook

This notebook provides tools for analyzing signals across the centralized traces directory.

## Features:
- Load and query the strategy index
- Analyze signals by strategy type, parameters, and date range
- Compare performance across different configurations
- Aggregate metrics across multiple runs

In [None]:
# Parameters (set by papermill or manually)
global_traces_dir = '/Users/daws/ADMF-PC/traces'  # Global traces directory
symbols = ['SPY']  # Symbols to analyze
strategy_types = None  # Strategy types to analyze (None = all)
date_start = None  # Start date for analysis
date_end = None  # End date for analysis
top_n = 20  # Number of top strategies to show

In [None]:
# Standard imports
import pandas as pd
import numpy as np
from pathlib import Path
import json
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configure display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.4f}'.format)

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print(f"📊 Global Signal Research Notebook")
print(f"📁 Traces directory: {global_traces_dir}")
print(f"🔍 Analyzing symbols: {symbols}")

## 1. Load Strategy Index

The strategy index contains metadata about all computed strategies.

In [None]:
# Load strategy index
traces_path = Path(global_traces_dir)
index_path = traces_path / 'strategy_index.parquet'

if index_path.exists():
    strategy_index = pd.read_parquet(index_path)
    print(f"✅ Loaded {len(strategy_index)} strategies from index")
    print(f"\nStrategy types: {strategy_index['strategy_type'].unique()}")
    print(f"\nDate range: {strategy_index['start_date'].min()} to {strategy_index['end_date'].max()}")
else:
    print("❌ No strategy index found. Run signal generation first.")
    strategy_index = pd.DataFrame()

In [None]:
# Display strategy index summary
if not strategy_index.empty:
    print("\n📋 Strategy Index Summary:")
    display(strategy_index.groupby('strategy_type').agg({
        'strategy_hash': 'count',
        'total_signals': 'sum',
        'start_date': 'min',
        'end_date': 'max'
    }).rename(columns={'strategy_hash': 'count'}))

## 2. Query Strategies

Filter strategies by various criteria.

In [None]:
# Filter strategies
filtered_strategies = strategy_index.copy()

# Filter by strategy type
if strategy_types:
    filtered_strategies = filtered_strategies[filtered_strategies['strategy_type'].isin(strategy_types)]
    print(f"Filtered to strategy types: {strategy_types}")

# Filter by date range
if date_start:
    filtered_strategies = filtered_strategies[filtered_strategies['end_date'] >= date_start]
    print(f"Filtered to strategies ending after: {date_start}")

if date_end:
    filtered_strategies = filtered_strategies[filtered_strategies['start_date'] <= date_end]
    print(f"Filtered to strategies starting before: {date_end}")

print(f"\n✅ Found {len(filtered_strategies)} strategies matching criteria")

## 3. Load and Analyze Signals

Load actual signal data for selected strategies.

In [None]:
def load_strategy_signals(strategy_row):
    """Load signals for a specific strategy."""
    trace_path = traces_path / strategy_row['trace_path']
    if trace_path.exists():
        return pd.read_parquet(trace_path)
    return pd.DataFrame()

# Load signals for top strategies by signal count
top_strategies = filtered_strategies.nlargest(top_n, 'total_signals')

all_signals = []
for idx, row in top_strategies.iterrows():
    signals = load_strategy_signals(row)
    if not signals.empty:
        # Add strategy metadata
        signals['strategy_type'] = row['strategy_type']
        signals['strategy_name'] = row['strategy_name']
        signals['strategy_hash'] = row['strategy_hash']
        all_signals.append(signals)

if all_signals:
    combined_signals = pd.concat(all_signals, ignore_index=True)
    print(f"✅ Loaded {len(combined_signals)} signals from {len(all_signals)} strategies")
else:
    combined_signals = pd.DataFrame()
    print("❌ No signals found")

## 4. Signal Analysis

Analyze signal patterns and distributions.

In [None]:
if not combined_signals.empty:
    # Signal distribution by strategy type
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Signal counts by strategy type
    signal_counts = combined_signals.groupby(['strategy_type', 'signal']).size().unstack(fill_value=0)
    signal_counts.plot(kind='bar', ax=axes[0], stacked=True)
    axes[0].set_title('Signal Distribution by Strategy Type')
    axes[0].set_xlabel('Strategy Type')
    axes[0].set_ylabel('Signal Count')
    axes[0].legend(['Short (-1)', 'Neutral (0)', 'Long (1)'])
    
    # Signal frequency over time
    combined_signals['date'] = pd.to_datetime(combined_signals['timestamp']).dt.date
    daily_signals = combined_signals.groupby(['date', 'signal']).size().unstack(fill_value=0)
    daily_signals.plot(ax=axes[1], alpha=0.7)
    axes[1].set_title('Daily Signal Frequency')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Signal Count')
    axes[1].legend(['Short', 'Neutral', 'Long'])
    
    plt.tight_layout()
    plt.show()

## 5. Performance Metrics

Calculate performance metrics for strategies (requires market data).

In [None]:
# Load market data for performance calculation
def load_market_data(symbol, start_date=None, end_date=None):
    """Load market data for a symbol."""
    # Try different data locations
    data_paths = [
        Path(f'data/{symbol}.csv'),
        Path(f'data/{symbol}_5m.csv'),
        Path(f'../data/{symbol}.csv'),
        Path(f'../data/{symbol}_5m.csv')
    ]
    
    for data_path in data_paths:
        if data_path.exists():
            df = pd.read_csv(data_path, parse_dates=['timestamp'], index_col='timestamp')
            if start_date:
                df = df[df.index >= start_date]
            if end_date:
                df = df[df.index <= end_date]
            return df
    return pd.DataFrame()

# Calculate simple returns for each strategy
strategy_performance = []

for symbol in symbols:
    market_data = load_market_data(symbol)
    if market_data.empty:
        print(f"⚠️ No market data found for {symbol}")
        continue
    
    print(f"\n📈 Calculating performance for {symbol}...")
    
    for idx, strategy in top_strategies.iterrows():
        signals = load_strategy_signals(strategy)
        if signals.empty:
            continue
        
        # Filter signals for this symbol
        symbol_signals = signals[signals['symbol'] == symbol].copy()
        if symbol_signals.empty:
            continue
        
        # Merge with market data
        symbol_signals['timestamp'] = pd.to_datetime(symbol_signals['timestamp'])
        merged = pd.merge_asof(
            symbol_signals.sort_values('timestamp'),
            market_data[['close']].sort_index(),
            left_on='timestamp',
            right_index=True,
            direction='backward'
        )
        
        if len(merged) > 0:
            # Calculate simple metrics
            total_signals = len(merged)
            long_signals = (merged['signal'] == 1).sum()
            short_signals = (merged['signal'] == -1).sum()
            
            strategy_performance.append({
                'strategy_name': strategy['strategy_name'],
                'strategy_type': strategy['strategy_type'],
                'symbol': symbol,
                'total_signals': total_signals,
                'long_signals': long_signals,
                'short_signals': short_signals,
                'signal_ratio': long_signals / short_signals if short_signals > 0 else np.inf
            })

if strategy_performance:
    perf_df = pd.DataFrame(strategy_performance)
    print("\n📊 Strategy Performance Summary:")
    display(perf_df.sort_values('total_signals', ascending=False))
else:
    print("❌ No performance data calculated")

## 6. Strategy Comparison

Compare different parameter configurations for the same strategy type.

In [None]:
# Group strategies by type and analyze parameter variations
if not strategy_index.empty:
    for strategy_type in strategy_index['strategy_type'].unique()[:3]:  # Top 3 strategy types
        type_strategies = strategy_index[strategy_index['strategy_type'] == strategy_type]
        
        if len(type_strategies) > 1:
            print(f"\n📊 Analyzing {strategy_type} variations ({len(type_strategies)} configurations)")
            
            # Parse parameters
            param_data = []
            for idx, row in type_strategies.iterrows():
                try:
                    params = json.loads(row['parameters']) if isinstance(row['parameters'], str) else row['parameters']
                    params['total_signals'] = row['total_signals']
                    params['strategy_hash'] = row['strategy_hash']
                    param_data.append(params)
                except:
                    pass
            
            if param_data:
                param_df = pd.DataFrame(param_data)
                # Show parameter distribution
                numeric_cols = param_df.select_dtypes(include=[np.number]).columns
                if len(numeric_cols) > 1:
                    display(param_df[numeric_cols].describe())

## 7. Export Results

Export selected strategies and signals for further analysis.

In [None]:
# Create export directory
export_dir = Path('research_exports')
export_dir.mkdir(exist_ok=True)

# Export top strategies
if not top_strategies.empty:
    export_file = export_dir / f'top_strategies_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
    top_strategies.to_csv(export_file, index=False)
    print(f"✅ Exported top strategies to: {export_file}")

# Export strategy performance
if 'perf_df' in locals() and not perf_df.empty:
    export_file = export_dir / f'strategy_performance_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
    perf_df.to_csv(export_file, index=False)
    print(f"✅ Exported performance data to: {export_file}")

print(f"\n📁 All exports saved to: {export_dir.absolute()}")

## 8. Advanced Queries

Examples of advanced queries on the signal database.

In [None]:
# Example: Find strategies with specific parameter ranges
def find_strategies_by_params(strategy_type, param_filters):
    """Find strategies matching parameter criteria."""
    type_strategies = strategy_index[strategy_index['strategy_type'] == strategy_type]
    matching = []
    
    for idx, row in type_strategies.iterrows():
        try:
            params = json.loads(row['parameters']) if isinstance(row['parameters'], str) else row['parameters']
            
            # Check all filters
            match = True
            for param, (min_val, max_val) in param_filters.items():
                if param not in params:
                    match = False
                    break
                if not (min_val <= params[param] <= max_val):
                    match = False
                    break
            
            if match:
                matching.append(row)
        except:
            pass
    
    return pd.DataFrame(matching)

# Example usage
if 'sma_crossover' in strategy_index['strategy_type'].values:
    print("\n🔍 Finding SMA crossover strategies with fast period 10-20:")
    results = find_strategies_by_params('sma_crossover', {
        'fast_period': (10, 20)
    })
    if not results.empty:
        display(results[['strategy_name', 'parameters', 'total_signals']])

In [None]:
# Summary statistics
print("\n📊 Research Summary:")
print(f"- Total strategies in database: {len(strategy_index)}")
print(f"- Strategy types: {len(strategy_index['strategy_type'].unique())}")
print(f"- Date range: {strategy_index['start_date'].min()} to {strategy_index['end_date'].max()}")
print(f"- Total signals: {strategy_index['total_signals'].sum():,}")
print(f"\n✅ Research notebook complete!")