In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Project imports
from src.evaluation.analysis import ResultsAnalyzer
from src.ensemble.boosting_ensemble import BoostingEnsemble

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print('Imports successful!')

## 1. Load Results

In [None]:
analyzer = ResultsAnalyzer(results_dir='../results')

# Load different results
try:
    baseline = analyzer.load_results('baseline_results.json')
    print('✓ Loaded baseline results')
except FileNotFoundError:
    baseline = {}
    print('✗ Baseline results not found')

try:
    boosting = analyzer.load_results('boosting_ensemble_results.json')
    print('✓ Loaded boosting ensemble results')
except FileNotFoundError:
    boosting = {}
    print('✗ Boosting results not found')

try:
    dynamic = analyzer.load_results('dynamic_selection_results.json')
    print('✓ Loaded dynamic selection results')
except FileNotFoundError:
    dynamic = {}
    print('✗ Dynamic selection results not found')

## 2. Compare Methods

In [None]:
# Prepare comparison data
comparison_data = []

for dataset in baseline.keys():
    # Individual models
    for model, results in baseline[dataset].items():
        comparison_data.append({
            'Dataset': dataset,
            'Method': model,
            'Accuracy': results.get('accuracy', 0)
        })
    
    # Boosting ensemble
    if dataset in boosting:
        comparison_data.append({
            'Dataset': dataset,
            'Method': 'Boosting Ensemble',
            'Accuracy': boosting[dataset].get('ensemble_accuracy', 0)
        })
    
    # Dynamic selection
    if dataset in dynamic:
        comparison_data.append({
            'Dataset': dataset,
            'Method': 'Dynamic Selection',
            'Accuracy': dynamic[dataset].get('ensemble_accuracy', 0)
        })

df_comparison = pd.DataFrame(comparison_data)
print(df_comparison.to_string())

## 3. Visualize Comparison

In [None]:
if not df_comparison.empty:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # By dataset
    sns.barplot(data=df_comparison, x='Dataset', y='Accuracy', hue='Method', ax=axes[0])
    axes[0].set_title('Method Comparison by Dataset')
    axes[0].set_ylim(0, 1)
    axes[0].legend(loc='lower left')
    
    # By method
    sns.barplot(data=df_comparison, x='Method', y='Accuracy', hue='Dataset', ax=axes[1])
    axes[1].set_title('Dataset Performance by Method')
    axes[1].set_ylim(0, 1)
    
    plt.tight_layout()
    plt.savefig('../results/ensemble_comparison.png', dpi=150)
    plt.show()

## 4. Improvement Analysis

In [None]:
if baseline and boosting:
    print('\nImprovement Over Best Individual Model:')
    print('=' * 70)
    
    for dataset in baseline.keys():
        # Best individual accuracy
        individual_accs = [r['accuracy'] for r in baseline[dataset].values()]
        best_individual = max(individual_accs)
        best_model = max(baseline[dataset].items(), key=lambda x: x[1]['accuracy'])[0]
        
        # Boosting ensemble
        if dataset in boosting:
            boosting_acc = boosting[dataset].get('ensemble_accuracy', 0)
            improvement = boosting_acc - best_individual
            improvement_pct = (improvement / best_individual * 100) if best_individual > 0 else 0
            
            print(f'\n{dataset}:')
            print(f"  Best Individual: {best_model} ({best_individual:.4f})")
            print(f"  Boosting Ensemble: {boosting_acc:.4f}")
            print(f"  Improvement: +{improvement:.4f} ({improvement_pct:+.2f}%)")

## 5. Disagreement Analysis

In [None]:
if boosting:
    print('\nModel Disagreement Rates:')
    print('=' * 50)
    
    disagreement_data = []
    
    for dataset, results in boosting.items():
        disagreement_data.append({
            'Dataset': dataset,
            'Disagreement Rate': results.get('avg_disagreement_rate', 0)
        })
    
    df_disagreement = pd.DataFrame(disagreement_data)
    print(df_disagreement.to_string())
    
    # Visualize
    plt.figure(figsize=(10, 5))
    sns.barplot(data=df_disagreement, x='Dataset', y='Disagreement Rate')
    plt.title('Model Disagreement Rate by Dataset')
    plt.ylim(0, 1)
    plt.savefig('../results/disagreement_rate.png', dpi=150)
    plt.show()