# Component Optimization Analysis

Comprehensive analysis of the production parameter optimization results using reusable analysis tools.

This notebook analyzes the optimal parameters generated from the production optimization run, providing insights into parameter selection, performance metrics, and optimization effectiveness.

In [None]:
# Setup and imports
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '..'))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Analysis modules
from src.analysis.parameter_analysis import (
    ParameterAnalyzer, 
    load_parameters_from_yaml,
    analyze_parameter_file
)
from src.visualization.optimization_analysis import (
    OptimizationVisualizer,
    create_optimization_summary
)
from src.analysis.optimization_statistics import (
    OptimizationStatistics,
    calculate_statistical_power
)

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

print("✅ Analysis tools loaded successfully")

## 1. Load Production Optimization Results

In [None]:
# Load the production optimization results
optimal_params_path = '../config/optimal_parameters.yaml'

try:
    # Load parameters using our analysis tools
    optimal_params = load_parameters_from_yaml(optimal_params_path)
    print(f"✅ Loaded optimal parameters from {optimal_params_path}")
    
    # Initialize analyzer
    analyzer = ParameterAnalyzer(optimal_params)
    print("✅ Parameter analyzer initialized")
    
    # Get summary
    summary_df = analyzer.create_parameter_summary()
    print(f"✅ Parameter summary created: {len(summary_df)} parameter sets")
    
    # Check if optimization failed
    failed_count = len(summary_df[summary_df['optimization_status'] == 'failed'])
    success_count = len(summary_df[summary_df['optimization_status'] == 'success'])
    
    if failed_count > 0:
        print(f"⚠️ Warning: {failed_count} parameter sets failed optimization")
        print(f"📊 Status: {success_count} successful, {failed_count} failed")
        print("\nNote: This analysis will demonstrate the framework with the available parameter structure")
        print("The optimization appears to have failed - this analysis shows the framework capabilities")
    else:
        print(f"✅ All {success_count} parameter sets optimized successfully")
    
except FileNotFoundError:
    print("❌ Optimal parameters file not found")
    print("Please ensure the production optimization has been run")
    raise
except Exception as e:
    print(f"❌ Error loading parameters: {e}")
    raise

## 2. Parameter Summary Overview

In [None]:
# Display parameter summary
print("🔍 PARAMETER SUMMARY OVERVIEW")
print("=" * 50)

# Basic counts
component_counts = summary_df['component'].value_counts()
print(f"\nComponent Distribution:")
for component, count in component_counts.items():
    print(f"  {component.replace('_', ' ').title()}: {count} parameter sets")

# Unique exposures
unique_exposures = summary_df[summary_df['exposure_id'] != 'ALL']['exposure_id'].nunique()
print(f"\nUnique Exposures: {unique_exposures}")

# Display first few rows
print("\n📊 Sample Parameter Data:")
display_cols = ['exposure_id', 'component', 'method', 'lookback_days', 'frequency', 'score']
print(summary_df[display_cols].head(10).to_string(index=False))

In [None]:
# Method distribution analysis
method_dist = analyzer.get_method_distribution()

print("\n🔍 METHOD DISTRIBUTION ANALYSIS")
print("=" * 50)

for component, methods in method_dist.items():
    print(f"\n{component.replace('_', ' ').title()}:")
    for method, count in methods.items():
        percentage = (count / sum(methods.values())) * 100
        print(f"  {method}: {count} ({percentage:.1f}%)")

In [None]:
# Lookback statistics
lookback_stats = analyzer.get_lookback_statistics()

print("\n📈 LOOKBACK PERIOD ANALYSIS")
print("=" * 50)

lookback_df = pd.DataFrame(lookback_stats).T
if not lookback_df.empty:
    print(lookback_df.round(1).to_string())
else:
    print("No lookback data available")

## 3. Visualization Analysis

In [None]:
# Initialize visualizer
visualizer = OptimizationVisualizer(analyzer)

# Create method distribution plot
fig_methods = visualizer.plot_method_distribution(figsize=(14, 6))
plt.suptitle('Production Optimization: Method Selection by Component', fontsize=16, y=1.02)
plt.show()

print("✅ Method distribution visualization complete")

In [None]:
# Lookback distribution analysis
fig_lookback = visualizer.plot_lookback_distribution(figsize=(14, 10))
plt.suptitle('Production Optimization: Lookback Period Distribution', fontsize=16, y=0.98)
plt.show()

print("✅ Lookback distribution visualization complete")

In [None]:
# Score analysis
fig_scores = visualizer.plot_score_analysis(figsize=(14, 10))
plt.suptitle('Production Optimization: Score Analysis', fontsize=16, y=0.98)
plt.show()

print("✅ Score analysis visualization complete")

In [None]:
# Parameter heatmap
fig_heatmap = visualizer.plot_parameter_heatmap(figsize=(16, 12))
plt.show()

print("✅ Parameter heatmap visualization complete")

## 4. Interactive Dashboard

In [None]:
# Create interactive dashboard
interactive_fig = visualizer.create_interactive_dashboard()
interactive_fig.show()

print("✅ Interactive dashboard created")

## 5. Statistical Analysis

In [None]:
# Initialize statistical analyzer
stats_analyzer = OptimizationStatistics(analyzer, confidence_level=0.95)

print("🔬 STATISTICAL ANALYSIS")
print("=" * 50)

# Generate comprehensive statistical report
statistical_report = stats_analyzer.generate_statistical_report()

print("✅ Statistical analysis complete")

In [None]:
# Component comparison
component_comparison = statistical_report['component_comparison']

print("\n📊 COMPONENT PERFORMANCE COMPARISON")
print("=" * 50)

if 'component_summaries' in component_comparison:
    comp_summary_df = pd.DataFrame(component_comparison['component_summaries']).T
    print(comp_summary_df.round(4).to_string())

# ANOVA results
if 'anova_result' in component_comparison and 'error' not in component_comparison['anova_result']:
    anova = component_comparison['anova_result']
    print(f"\n🔬 ANOVA Test Results:")
    print(f"  F-statistic: {anova['f_statistic']:.4f}")
    print(f"  P-value: {anova['p_value']:.6f}")
    print(f"  Significant: {anova['significant']}")
    print(f"  Effect size (η²): {anova['eta_squared']:.4f}")
    print(f"  Interpretation: {anova['interpretation']}")

In [None]:
# Robustness analysis
print("\n🛡️ ROBUSTNESS ANALYSIS")
print("=" * 50)

robustness_results = statistical_report['robustness_analysis']

for component, robustness in robustness_results.items():
    if 'robustness_score' in robustness:
        print(f"\n{component.replace('_', ' ').title()}:")
        print(f"  Robustness Score: {robustness['robustness_score']:.0f}/100")
        print(f"  Interpretation: {robustness['interpretation']}")
        
        if 'method_diversity' in robustness:
            diversity = robustness['method_diversity']
            print(f"  Method Diversity: {diversity['unique_methods']} unique methods")
            print(f"  Dominant Method: {diversity['dominant_method']} ({diversity['dominant_method_pct']:.1f}%)")
        
        if 'score_robustness' in robustness:
            score_rob = robustness['score_robustness']
            print(f"  Score Range: {score_rob['score_range']:.6f}")
            print(f"  Score Skewness: {score_rob['score_skewness']:.3f}")

## 6. Parameter Consistency Analysis

In [None]:
# Parameter consistency analysis
consistency = analyzer.get_parameter_consistency()

print("🎯 PARAMETER CONSISTENCY ANALYSIS")
print("=" * 50)

consistency_df = pd.DataFrame(consistency).T
if not consistency_df.empty:
    print(consistency_df.to_string())
    
    print("\n📋 Consistency Summary:")
    for component, metrics in consistency.items():
        print(f"\n{component.replace('_', ' ').title()}:")
        
        if metrics['method_consistency']:
            print(f"  ✅ Methods consistent: {metrics['dominant_method']}")
        else:
            print(f"  ⚠️ Methods inconsistent (dominant: {metrics['dominant_method']})")
        
        if metrics['frequency_consistency']:
            print(f"  ✅ Frequencies consistent: {metrics['dominant_frequency']}")
        else:
            print(f"  ⚠️ Frequencies inconsistent (dominant: {metrics['dominant_frequency']})")
        
        if not pd.isna(metrics['lookback_cv']):
            cv = metrics['lookback_cv']
            if cv < 0.1:
                print(f"  ✅ Lookback periods very stable (CV: {cv:.3f})")
            elif cv < 0.3:
                print(f"  📊 Lookback periods moderately stable (CV: {cv:.3f})")
            else:
                print(f"  ⚠️ Lookback periods variable (CV: {cv:.3f})")
else:
    print("No consistency data available")

## 7. Exposure-Specific Analysis

In [None]:
# Analyze specific exposures
key_exposures = ['us_large_equity', 'dynamic_global_bonds', 'commodities', 'real_estate']

print("🎯 EXPOSURE-SPECIFIC ANALYSIS")
print("=" * 50)

# Filter for key exposures that exist in our data
available_exposures = summary_df['exposure_id'].unique()
analysis_exposures = [exp for exp in key_exposures if exp in available_exposures]

if analysis_exposures:
    comparison_df = analyzer.compare_exposure_parameters(analysis_exposures)
    
    print(f"\nComparing parameters for: {', '.join(analysis_exposures)}")
    print("\n📊 Parameter Comparison:")
    
    # Display the comparison in a readable format
    for metric in ['method', 'lookback_days', 'frequency', 'score']:
        if metric in comparison_df.columns.get_level_values(0):
            print(f"\n{metric.replace('_', ' ').title()}:")
            metric_data = comparison_df[metric]
            print(metric_data.to_string())
else:
    print("Key exposures not found in optimization results")
    print(f"Available exposures: {list(available_exposures)}")

## 8. Outlier Detection

In [None]:
# Outlier detection
print("🔍 OUTLIER DETECTION")
print("=" * 50)

outlier_results = {}

for component in ['volatility', 'expected_returns']:
    print(f"\n{component.replace('_', ' ').title()} Component:")
    
    score_outliers = analyzer.identify_outliers(component, 'score')
    lookback_outliers = analyzer.identify_outliers(component, 'lookback_days')
    
    outlier_results[component] = {
        'score_outliers': score_outliers,
        'lookback_outliers': lookback_outliers
    }
    
    if score_outliers:
        print(f"  📊 Score outliers: {', '.join(score_outliers)}")
    else:
        print(f"  ✅ No score outliers detected")
    
    if lookback_outliers:
        print(f"  📈 Lookback outliers: {', '.join(lookback_outliers)}")
    else:
        print(f"  ✅ No lookback outliers detected")

# Summary
total_outliers = sum(len(results['score_outliers']) + len(results['lookback_outliers']) 
                    for results in outlier_results.values())
print(f"\n📋 Total outliers detected: {total_outliers}")

## 9. Optimization Insights and Recommendations

In [None]:
# Generate comprehensive insights
insights = analyzer.generate_optimization_insights()

print("💡 OPTIMIZATION INSIGHTS & RECOMMENDATIONS")
print("=" * 60)

# Display recommendations
if insights['recommendations']:
    print("\n🎯 Key Recommendations:")
    for i, rec in enumerate(insights['recommendations'], 1):
        print(f"  {i}. {rec}")
else:
    print("\n✅ No specific recommendations - optimization appears well-configured")

# Additional insights from statistical analysis
if statistical_report['recommendations']:
    print("\n🔬 Statistical Recommendations:")
    for i, rec in enumerate(statistical_report['recommendations'], 1):
        print(f"  {i}. {rec}")

## 10. Summary Report

In [None]:
# Create comprehensive summary
print("📋 PRODUCTION OPTIMIZATION SUMMARY REPORT")
print("=" * 60)

# Basic statistics
total_params = len(summary_df)
unique_exposures = summary_df[summary_df['exposure_id'] != 'ALL']['exposure_id'].nunique()
components = summary_df['component'].nunique()

print(f"\n📊 Optimization Overview:")
print(f"  Total parameter sets: {total_params}")
print(f"  Unique exposures: {unique_exposures}")
print(f"  Components optimized: {components}")

# Method distribution summary
print(f"\n🔧 Method Selection:")
all_methods = summary_df['method'].value_counts()
for method, count in all_methods.items():
    percentage = (count / len(summary_df)) * 100
    print(f"  {method}: {count} ({percentage:.1f}%)")

# Score summary
scores = summary_df['score'].dropna()
if len(scores) > 0:
    print(f"\n📈 Score Analysis:")
    print(f"  Mean score: {scores.mean():.6f}")
    print(f"  Score range: {scores.min():.6f} - {scores.max():.6f}")
    print(f"  Score std dev: {scores.std():.6f}")

# Lookback summary
lookbacks = summary_df['lookback_days'].dropna()
if len(lookbacks) > 0:
    print(f"\n📅 Lookback Period Analysis:")
    print(f"  Mean lookback: {lookbacks.mean():.1f} days")
    print(f"  Lookback range: {lookbacks.min():.0f} - {lookbacks.max():.0f} days")
    print(f"  Most common: {lookbacks.mode().iloc[0]:.0f} days")

# Robustness summary
robust_components = sum(1 for comp, rob in robustness_results.items() 
                       if 'robustness_score' in rob and rob['robustness_score'] >= 80)
total_components = len(robustness_results)

print(f"\n🛡️ Robustness Assessment:")
print(f"  Highly robust components: {robust_components}/{total_components}")

# Final assessment
print(f"\n🎯 Overall Assessment:")
if total_outliers == 0 and robust_components == total_components:
    print("  ✅ EXCELLENT: Optimization is highly robust with consistent parameters")
elif total_outliers <= 2 and robust_components >= total_components * 0.8:
    print("  📊 GOOD: Optimization is generally robust with minor inconsistencies")
elif total_outliers <= 5 or robust_components >= total_components * 0.6:
    print("  ⚠️ MODERATE: Optimization shows some variability that may need attention")
else:
    print("  ❌ NEEDS REVIEW: Optimization shows significant variability across components")

print("\n" + "=" * 60)
print("Analysis complete! Review visualizations and recommendations above.")

## 11. Export Results

In [None]:
# Export analysis results
export_path = '../analysis_results'
os.makedirs(export_path, exist_ok=True)

# Export parameter summary
summary_export_path = f'{export_path}/parameter_summary.csv'
summary_df.to_csv(summary_export_path, index=False)
print(f"📄 Parameter summary exported to: {summary_export_path}")

# Export insights as JSON
import json
insights_export_path = f'{export_path}/optimization_insights.json'
with open(insights_export_path, 'w') as f:
    # Convert numpy types to native Python types for JSON serialization
    insights_serializable = {}
    for key, value in insights.items():
        if key == 'parameter_summary':
            continue  # Skip DataFrame
        insights_serializable[key] = value
    
    json.dump(insights_serializable, f, indent=2, default=str)
print(f"💡 Optimization insights exported to: {insights_export_path}")

# Save visualizations
viz_path = f'{export_path}/visualizations'
figures = visualizer.create_summary_report(viz_path)
print(f"📊 Visualizations saved to: {viz_path}/")

print("\n✅ All analysis results exported successfully!")