# Bollinger Bands Parameter Analysis

This notebook analyzes the results of your Bollinger Bands parameter sweep (1640 combinations tested).

## 1. Setup and Import Libraries

In [None]:
import duckdb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import re

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## 2. Connect to DuckDB and Load Trace Files

In [None]:
# Connect to DuckDB (in-memory)
con = duckdb.connect()

# Path to results
results_path = Path('../results/latest/traces/bollinger_bands')

# Create view of all trace files
con.execute(f"""
    CREATE VIEW traces AS 
    SELECT * FROM read_parquet('{results_path}/*.parquet')
""")

# Check how many files we have
trace_count = con.execute("SELECT COUNT(DISTINCT filename) FROM (SELECT regexp_extract(filename, '[^/]+$') as filename FROM traces)").fetchone()[0]
print(f"Total strategy variations loaded: {trace_count}")

# Preview the data structure
print("\nData structure:")
con.execute("SELECT * FROM traces LIMIT 5").df()

## 3. Extract Parameters from Filenames

In [None]:
# Function to extract strategy ID from filename
def extract_strategy_info(filename):
    match = re.search(r'compiled_strategy_(\d+)', filename)
    if match:
        return int(match.group(1))
    return None

# Load a sample to understand the parameter mapping
# We'll need to reverse engineer the parameter grid
periods = list(range(10, 50, 1))  # 40 values
std_devs = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0]  # 8 values

# Create parameter mapping
param_mapping = {}
strategy_id = 0
for period in periods:
    for std_dev in std_devs:
        param_mapping[strategy_id] = {
            'period': period,
            'std_dev': std_dev,
            'strategy_id': strategy_id
        }
        strategy_id += 1

# Convert to DataFrame for easier joining
param_df = pd.DataFrame.from_dict(param_mapping, orient='index')
print(f"Generated {len(param_df)} parameter combinations")
param_df.head(10)

## 4. Analyze Signal Frequency and Characteristics

In [None]:
# Count signals per strategy
signal_stats = con.execute("""
    WITH file_info AS (
        SELECT 
            *,
            CAST(regexp_extract(filename, 'compiled_strategy_(\\d+)', 1) AS INT) as strategy_id
        FROM traces
    ),
    signal_changes AS (
        SELECT 
            strategy_id,
            COUNT(*) as num_signals,
            COUNT(DISTINCT DATE(timestamp)) as trading_days,
            MIN(timestamp) as first_signal,
            MAX(timestamp) as last_signal
        FROM file_info
        WHERE signal != 0  -- Only count actual signals
        GROUP BY strategy_id
    )
    SELECT * FROM signal_changes
    ORDER BY strategy_id
""").df()

# Merge with parameters
signal_stats = signal_stats.merge(param_df, on='strategy_id', how='left')

# Calculate additional metrics
signal_stats['signals_per_day'] = signal_stats['num_signals'] / signal_stats['trading_days']
signal_stats['avg_days_between_signals'] = signal_stats['trading_days'] / signal_stats['num_signals']

print("Signal statistics summary:")
print(signal_stats.describe())

## 5. Create Parameter Heatmaps

In [None]:
# Pivot data for heatmap
signals_pivot = signal_stats.pivot(index='period', columns='std_dev', values='signals_per_day')

# Create figure with subplots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Heatmap 1: Signals per day
sns.heatmap(signals_pivot, annot=True, fmt='.2f', cmap='viridis', ax=axes[0, 0])
axes[0, 0].set_title('Average Signals Per Day')
axes[0, 0].set_xlabel('Standard Deviation')
axes[0, 0].set_ylabel('Period')

# Heatmap 2: Total number of signals
total_signals_pivot = signal_stats.pivot(index='period', columns='std_dev', values='num_signals')
sns.heatmap(total_signals_pivot, annot=False, cmap='plasma', ax=axes[0, 1])
axes[0, 1].set_title('Total Number of Signals')

# Find optimal signal frequency (not too many, not too few)
# Target: 1-3 signals per day
optimal_freq = signal_stats[(signal_stats['signals_per_day'] >= 1) & 
                           (signal_stats['signals_per_day'] <= 3)]
print(f"\nStrategies with optimal signal frequency (1-3 per day): {len(optimal_freq)}")
print("\nTop 10 by signal frequency balance:")
print(optimal_freq.nsmallest(10, 'signals_per_day')[['period', 'std_dev', 'signals_per_day', 'num_signals']])

plt.tight_layout()
plt.show()

## 6. Backtest Performance Analysis

To calculate actual performance metrics, we need to merge with market data:

In [None]:
# Load market data
market_data_path = '../../../../data/SPY_5m.parquet'  # Adjust path as needed
market_data = pd.read_parquet(market_data_path)
print(f"Market data shape: {market_data.shape}")

# Function to calculate performance for a specific strategy
def calculate_performance(strategy_id, param_row):
    # Load signals for this strategy
    signals = con.execute(f"""
        SELECT timestamp, symbol, signal
        FROM traces
        WHERE CAST(regexp_extract(filename, 'compiled_strategy_(\\d+)', 1) AS INT) = {strategy_id}
        ORDER BY timestamp
    """).df()
    
    if len(signals) == 0:
        return None
    
    # Merge with market data
    df = market_data.merge(signals, on=['timestamp', 'symbol'], how='left')
    df['signal'] = df['signal'].fillna(method='ffill').fillna(0)
    
    # Calculate returns
    df['returns'] = df['close'].pct_change()
    df['strategy_returns'] = df['returns'] * df['signal'].shift(1)
    df['cum_returns'] = (1 + df['strategy_returns']).cumprod()
    
    # Calculate metrics
    total_return = df['cum_returns'].iloc[-1] - 1
    sharpe = df['strategy_returns'].mean() / df['strategy_returns'].std() * np.sqrt(252 * 78)  # 5min bars
    max_dd = (df['cum_returns'] / df['cum_returns'].expanding().max() - 1).min()
    win_rate = (df[df['strategy_returns'] > 0]['strategy_returns'].count() / 
                df[df['strategy_returns'] != 0]['strategy_returns'].count())
    
    return {
        'strategy_id': strategy_id,
        'total_return': total_return,
        'sharpe_ratio': sharpe,
        'max_drawdown': max_dd,
        'win_rate': win_rate,
        'period': param_row['period'],
        'std_dev': param_row['std_dev']
    }

# Calculate performance for top signal frequency candidates
print("Calculating performance metrics for optimal frequency strategies...")
performance_results = []

for idx, row in optimal_freq.head(20).iterrows():  # Test top 20 candidates
    result = calculate_performance(row['strategy_id'], row)
    if result:
        performance_results.append(result)

performance_df = pd.DataFrame(performance_results)

## 7. Find Best Performers

In [None]:
# Sort by Sharpe ratio
best_by_sharpe = performance_df.nlargest(10, 'sharpe_ratio')
print("Top 10 strategies by Sharpe Ratio:")
print(best_by_sharpe[['period', 'std_dev', 'sharpe_ratio', 'total_return', 'max_drawdown', 'win_rate']])

# Sort by total return
best_by_return = performance_df.nlargest(10, 'total_return')
print("\nTop 10 strategies by Total Return:")
print(best_by_return[['period', 'std_dev', 'total_return', 'sharpe_ratio', 'max_drawdown', 'win_rate']])

# Find balanced performers (good Sharpe, reasonable drawdown)
balanced = performance_df[
    (performance_df['sharpe_ratio'] > 1.0) & 
    (performance_df['max_drawdown'] > -0.10)
]
print(f"\nBalanced strategies (Sharpe > 1.0, Max DD > -10%): {len(balanced)}")
if len(balanced) > 0:
    print(balanced.head())

## 8. Parameter Stability Analysis

In [None]:
# Analyze how performance changes with parameters
# This helps identify robust parameter regions

# Group by period and calculate stats
period_stats = performance_df.groupby('period').agg({
    'sharpe_ratio': ['mean', 'std'],
    'total_return': ['mean', 'std'],
    'max_drawdown': ['mean', 'std']
}).round(3)

# Find most stable periods
period_stats['sharpe_stability'] = period_stats[('sharpe_ratio', 'mean')] / period_stats[('sharpe_ratio', 'std')]
stable_periods = period_stats.nlargest(5, 'sharpe_stability')
print("Most stable periods (high mean/std ratio for Sharpe):")
print(stable_periods)

# Similar analysis for std_dev
std_stats = performance_df.groupby('std_dev').agg({
    'sharpe_ratio': ['mean', 'std'],
    'total_return': ['mean', 'std']
}).round(3)

# Plot parameter sensitivity
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Sharpe by period
performance_df.groupby('period')['sharpe_ratio'].mean().plot(ax=axes[0, 0])
axes[0, 0].set_title('Average Sharpe Ratio by Period')
axes[0, 0].set_xlabel('Period')
axes[0, 0].set_ylabel('Sharpe Ratio')

# Sharpe by std_dev
performance_df.groupby('std_dev')['sharpe_ratio'].mean().plot(ax=axes[0, 1])
axes[0, 1].set_title('Average Sharpe Ratio by Std Dev')
axes[0, 1].set_xlabel('Standard Deviation')
axes[0, 1].set_ylabel('Sharpe Ratio')

# Return by period
performance_df.groupby('period')['total_return'].mean().plot(ax=axes[1, 0])
axes[1, 0].set_title('Average Total Return by Period')
axes[1, 0].set_xlabel('Period')
axes[1, 0].set_ylabel('Total Return')

# Max drawdown by std_dev
performance_df.groupby('std_dev')['max_drawdown'].mean().plot(ax=axes[1, 1])
axes[1, 1].set_title('Average Max Drawdown by Std Dev')
axes[1, 1].set_xlabel('Standard Deviation')
axes[1, 1].set_ylabel('Max Drawdown')

plt.tight_layout()
plt.show()

## 9. Final Recommendations

In [None]:
# Create final recommendations
print("="*60)
print("BOLLINGER BANDS PARAMETER RECOMMENDATIONS")
print("="*60)

# Best overall performer
best_overall = performance_df.loc[performance_df['sharpe_ratio'].idxmax()]
print(f"\n1. BEST OVERALL PERFORMER:")
print(f"   Period: {best_overall['period']}")
print(f"   Std Dev: {best_overall['std_dev']}")
print(f"   Sharpe Ratio: {best_overall['sharpe_ratio']:.2f}")
print(f"   Total Return: {best_overall['total_return']:.1%}")
print(f"   Max Drawdown: {best_overall['max_drawdown']:.1%}")

# Most robust region
robust_region = performance_df[
    (performance_df['period'].between(18, 25)) & 
    (performance_df['std_dev'].between(1.5, 2.5))
]
robust_best = robust_region.loc[robust_region['sharpe_ratio'].idxmax()]
print(f"\n2. MOST ROBUST PARAMETERS (stable region):")
print(f"   Period: {robust_best['period']}")
print(f"   Std Dev: {robust_best['std_dev']}")
print(f"   Sharpe Ratio: {robust_best['sharpe_ratio']:.2f}")
print(f"   (from region: period 18-25, std 1.5-2.5)")

# Conservative choice
conservative = performance_df[performance_df['max_drawdown'] > -0.05]
if len(conservative) > 0:
    conservative_best = conservative.loc[conservative['sharpe_ratio'].idxmax()]
    print(f"\n3. CONSERVATIVE CHOICE (max DD < 5%):")
    print(f"   Period: {conservative_best['period']}")
    print(f"   Std Dev: {conservative_best['std_dev']}")
    print(f"   Sharpe Ratio: {conservative_best['sharpe_ratio']:.2f}")
    print(f"   Max Drawdown: {conservative_best['max_drawdown']:.1%}")

# Save recommendations
recommendations = {
    'best_overall': {
        'period': int(best_overall['period']),
        'std_dev': float(best_overall['std_dev']),
        'sharpe_ratio': float(best_overall['sharpe_ratio']),
        'total_return': float(best_overall['total_return']),
        'max_drawdown': float(best_overall['max_drawdown'])
    },
    'robust_choice': {
        'period': int(robust_best['period']),
        'std_dev': float(robust_best['std_dev']),
        'sharpe_ratio': float(robust_best['sharpe_ratio'])
    }
}

with open('../bollinger_recommendations.json', 'w') as f:
    json.dump(recommendations, f, indent=2)
print("\n✅ Recommendations saved to bollinger_recommendations.json")

## 10. Production Config Generation

In [None]:
# Generate production-ready config
production_config = f"""
name: bollinger_production
data: SPY_5m

strategy:
  bollinger_bands:
    period: {int(best_overall['period'])}
    std_dev: {float(best_overall['std_dev'])}
    threshold: "intraday"
"""

with open('../bollinger_production.yaml', 'w') as f:
    f.write(production_config)
    
print("\n✅ Production config saved to bollinger_production.yaml")
print("\nProduction config:")
print(production_config)