### Results analysis

In [None]:
# ============================================================
# FEATURE SELECTION RESULTS ANALYSIS
# ============================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Create output directories
Path("results/comparisons/plots").mkdir(parents=True, exist_ok=True)
Path("results/comparisons/reports").mkdir(parents=True, exist_ok=True)

In [None]:
# ============================================================
# 1. LOAD ALL RESULTS
# ============================================================

print("Loading results...")

# Load each experiment's results
baseline = pd.read_csv("results/comparisons/tables/baseline_ibtracs.last3years.csv")
traditional = pd.read_csv("results/comparisons/tables/traditional_fs_ibtracs.last3years.csv")
embedded = pd.read_csv("results/comparisons/tables/embedded_fs_ibtracs.last3years.csv")
xai = pd.read_csv("results/comparisons/tables/xai_fs_ibtracs.last3years.csv")
optimization = pd.read_csv("results/comparisons/tables/optimization_fs_ibtracs.last3years.csv")

# Combine all results
all_results = pd.concat([baseline, traditional, embedded, xai, optimization], ignore_index=True)

# Save combined results
all_results.to_csv("results/comparisons/tables/all_methods_combined.csv", index=False)

print(f"âœ“ Loaded {len(all_results)} experiments")
print(f"âœ“ Methods: {all_results['method'].nunique()}")
print(f"âœ“ Models: {all_results['model'].nunique()}")
print(f"\nDataset preview:")
print(all_results.head(10))

In [None]:
# ============================================================
# 2. SUMMARY STATISTICS
# ============================================================

print("\n" + "="*60)
print("SUMMARY STATISTICS")
print("="*60)

# Overall statistics by method
method_summary = all_results.groupby('method').agg({
    'n_features': ['min', 'max', 'mean'],
    'f1_score': ['min', 'max', 'mean'],
    'auc': ['mean'],
    'train_time': ['mean']
}).round(4)

print("\nPerformance by Method:")
print(method_summary)

# Best result for each method
best_per_method = all_results.loc[all_results.groupby('method')['f1_score'].idxmax()][
    ['method', 'model', 'n_features', 'f1_score', 'auc']
].sort_values('f1_score', ascending=False)

print("\n" + "="*60)
print("BEST RESULT PER METHOD")
print("="*60)
print(best_per_method.to_string(index=False))

# Save to CSV
best_per_method.to_csv("results/comparisons/tables/best_results_per_method.csv", index=False)

In [None]:
# ============================================================
# 3. PLOT 1: F1 SCORE VS NUMBER OF FEATURES
# ============================================================

plt.figure(figsize=(14, 8))

# Get baseline F1 for reference
baseline_f1 = baseline['f1_score'].mean()
baseline_features = baseline['n_features'].iloc[0]

# Plot each method
methods = all_results['method'].unique()
colors = sns.color_palette("husl", len(methods))

for i, method in enumerate(methods):
    method_data = all_results[all_results['method'] == method]
    
    # Average F1 by number of features
    avg_by_features = method_data.groupby('n_features').agg({
        'f1_score': 'mean'
    }).reset_index()
    
    plt.plot(
        avg_by_features['n_features'], 
        avg_by_features['f1_score'],
        marker='o', 
        linewidth=2,
        markersize=8,
        label=method.replace('_', ' ').title(),
        color=colors[i]
    )

# Add baseline reference line
plt.axhline(
    y=baseline_f1, 
    color='red', 
    linestyle='--', 
    linewidth=2,
    label=f'Baseline (F1={baseline_f1:.3f}, {baseline_features} features)'
)

plt.xlabel('Number of Features', fontsize=14, fontweight='bold')
plt.ylabel('F1 Score', fontsize=14, fontweight='bold')
plt.title('Feature Selection Methods: F1 Score vs Number of Features', 
          fontsize=16, fontweight='bold', pad=20)
plt.legend(loc='best', fontsize=11, framealpha=0.9)
plt.grid(True, alpha=0.3)
plt.tight_layout()

plt.savefig('results/comparisons/plots/f1_vs_features.png', dpi=300, bbox_inches='tight')
plt.show()

print("âœ“ Saved: f1_vs_features.png")

In [None]:
# ============================================================
# 4. PLOT 2: PERFORMANCE HEATMAP (METHOD Ã— MODEL)
# ============================================================

plt.figure(figsize=(12, 10))

# Create pivot table: max F1 score for each method-model combination
pivot = all_results.pivot_table(
    values='f1_score',
    index='method',
    columns='model',
    aggfunc='max'
)

# Clean up names
pivot.index = [x.replace('_', ' ').title() for x in pivot.index]
pivot.columns = [x.replace('_', ' ').title() for x in pivot.columns]

# Create heatmap
sns.heatmap(
    pivot, 
    annot=True, 
    fmt='.4f', 
    cmap='RdYlGn',
    vmin=0.70,
    vmax=0.90,
    cbar_kws={'label': 'F1 Score'},
    linewidths=0.5
)

plt.title('Best F1 Score by Feature Selection Method and Model', 
          fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Model', fontsize=14, fontweight='bold')
plt.ylabel('Feature Selection Method', fontsize=14, fontweight='bold')
plt.tight_layout()

plt.savefig('results/comparisons/plots/method_model_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()

print("âœ“ Saved: method_model_heatmap.png")

In [None]:
# ============================================================
# 5. PLOT 3: FEATURE REDUCTION EFFICIENCY
# ============================================================

fig, ax = plt.subplots(figsize=(14, 8))

# Get best result per method
best_results = all_results.loc[all_results.groupby('method')['f1_score'].idxmax()]

# Calculate feature reduction percentage
best_results['reduction_pct'] = (1 - best_results['n_features'] / baseline_features) * 100

# Sort by F1 score
best_results = best_results.sort_values('f1_score', ascending=True)

# Create horizontal bar chart
bars = ax.barh(
    range(len(best_results)),
    best_results['f1_score'],
    color=sns.color_palette("viridis", len(best_results))
)

# Add feature count labels
for i, (idx, row) in enumerate(best_results.iterrows()):
    ax.text(
        row['f1_score'] + 0.005,
        i,
        f"{int(row['n_features'])} features\n({row['reduction_pct']:.0f}% reduction)",
        va='center',
        fontsize=9
    )

# Formatting
ax.set_yticks(range(len(best_results)))
ax.set_yticklabels([x.replace('_', ' ').title() for x in best_results['method']])
ax.set_xlabel('F1 Score', fontsize=14, fontweight='bold')
ax.set_title('Feature Selection Efficiency: F1 Score vs Feature Reduction', 
             fontsize=16, fontweight='bold', pad=20)
ax.axvline(x=baseline_f1, color='red', linestyle='--', linewidth=2, label='Baseline')
ax.legend()
ax.grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig('results/comparisons/plots/feature_reduction_efficiency.png', dpi=300, bbox_inches='tight')
plt.show()

print("âœ“ Saved: feature_reduction_efficiency.png")

In [None]:
# ============================================================
# 6. PLOT 4: AUC COMPARISON
# ============================================================

plt.figure(figsize=(14, 8))

# Box plot of AUC scores by method
methods_for_plot = all_results['method'].unique()
data_for_box = [all_results[all_results['method'] == m]['auc'].values for m in methods_for_plot]

bp = plt.boxplot(
    data_for_box,
    labels=[x.replace('_', ' ').title() for x in methods_for_plot],
    patch_artist=True,
    showmeans=True
)

# Color the boxes
colors = sns.color_palette("Set2", len(methods_for_plot))
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)

# Add baseline reference
baseline_auc = baseline['auc'].mean()
plt.axhline(y=baseline_auc, color='red', linestyle='--', linewidth=2, 
            label=f'Baseline AUC={baseline_auc:.4f}')

plt.xlabel('Feature Selection Method', fontsize=14, fontweight='bold')
plt.ylabel('AUC Score', fontsize=14, fontweight='bold')
plt.title('AUC Score Distribution by Feature Selection Method', 
          fontsize=16, fontweight='bold', pad=20)
plt.xticks(rotation=45, ha='right')
plt.legend()
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()

plt.savefig('results/comparisons/plots/auc_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("âœ“ Saved: auc_comparison.png")

In [None]:
# ============================================================
# 7. PLOT 5: TRAINING TIME COMPARISON
# ============================================================

plt.figure(figsize=(14, 8))

# Average training time by method and feature count
time_data = all_results.groupby(['method', 'n_features']).agg({
    'train_time': 'mean'
}).reset_index()

methods = time_data['method'].unique()
colors = sns.color_palette("husl", len(methods))

for i, method in enumerate(methods):
    method_time = time_data[time_data['method'] == method]
    plt.plot(
        method_time['n_features'],
        method_time['train_time'],
        marker='o',
        linewidth=2,
        markersize=8,
        label=method.replace('_', ' ').title(),
        color=colors[i]
    )

plt.xlabel('Number of Features', fontsize=14, fontweight='bold')
plt.ylabel('Average Training Time (seconds)', fontsize=14, fontweight='bold')
plt.title('Training Time vs Number of Features', fontsize=16, fontweight='bold', pad=20)
plt.legend(loc='best', fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()

plt.savefig('results/comparisons/plots/training_time_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("âœ“ Saved: training_time_comparison.png")

In [None]:
# ============================================================
# 8. ANALYSIS: MINIMUM FEATURES FOR BASELINE PERFORMANCE
# ============================================================

print("\n" + "="*60)
print("MINIMUM FEATURES TO MATCH BASELINE")
print("="*60)

baseline_threshold = baseline_f1 * 0.98  # Within 2% of baseline

# Find minimum features that achieve threshold per method
competitive = all_results[all_results['f1_score'] >= baseline_threshold]

min_features_per_method = competitive.groupby('method').agg({
    'n_features': 'min',
    'f1_score': 'max'
}).round(4)

min_features_per_method = min_features_per_method.sort_values('n_features')

print(f"\nBaseline F1: {baseline_f1:.4f}")
print(f"Threshold (98% of baseline): {baseline_threshold:.4f}\n")
print(min_features_per_method)

# Save
min_features_per_method.to_csv("results/comparisons/tables/min_features_analysis.csv")

In [None]:
# ============================================================
# 9. FEATURE FREQUENCY ANALYSIS
# ============================================================

print("\n" + "="*60)
print("MOST FREQUENTLY SELECTED FEATURES")
print("="*60)

# Load selected features from all methods
feature_frequency = {}

methods_dirs = {
    'correlation': 'results/features/ibtracs.last3years/correlation',
    'variance': 'results/features/ibtracs.last3years/variance',
    'chi_square': 'results/features/ibtracs.last3years/chi_square',
    'l1_lasso': 'results/features/ibtracs.last3years/l1_lasso',
    'l2_ridge': 'results/features/ibtracs.last3years/l2_ridge',
    'shap': 'results/features/ibtracs.last3years/shap',
    'lime': 'results/features/ibtracs.last3years/lime',
    'pso': 'results/features/ibtracs.last3years/pso',
    'differential_evolution': 'results/features/ibtracs.last3years/differential_evolution'
}

for method_name, method_dir in methods_dirs.items():
    method_path = Path(method_dir)
    if method_path.exists():
        # Get all JSON files
        for json_file in method_path.glob("*.json"):
            with open(json_file, 'r') as f:
                data = json.load(f)
                selected = data.get('selected_features', [])
                
                for feature in selected:
                    feature_frequency[feature] = feature_frequency.get(feature, 0) + 1

# Sort by frequency
sorted_features = sorted(feature_frequency.items(), key=lambda x: x[1], reverse=True)

print("\nTop 20 most frequently selected features:")
for i, (feature, count) in enumerate(sorted_features[:20], 1):
    print(f"{i:2d}. {feature:40s} - selected {count} times")

# Save to CSV
freq_df = pd.DataFrame(sorted_features, columns=['feature', 'frequency'])
freq_df.to_csv("results/comparisons/tables/feature_frequency.csv", index=False)

print(f"\nâœ“ Saved feature frequency analysis")

In [None]:
# ============================================================
# 10. PLOT 6: TOP FEATURES FREQUENCY
# ============================================================

plt.figure(figsize=(12, 8))

top_features = freq_df.head(15)

bars = plt.barh(
    range(len(top_features)),
    top_features['frequency'],
    color=sns.color_palette("viridis", len(top_features))
)

plt.yticks(range(len(top_features)), top_features['feature'])
plt.xlabel('Selection Frequency', fontsize=14, fontweight='bold')
plt.ylabel('Feature', fontsize=14, fontweight='bold')
plt.title('Top 15 Most Frequently Selected Features Across All Methods', 
          fontsize=16, fontweight='bold', pad=20)
plt.grid(True, alpha=0.3, axis='x')

# Add value labels
for i, (idx, row) in enumerate(top_features.iterrows()):
    plt.text(row['frequency'] + 0.3, i, str(int(row['frequency'])), 
             va='center', fontsize=10)

plt.tight_layout()
plt.savefig('results/comparisons/plots/top_features_frequency.png', dpi=300, bbox_inches='tight')
plt.show()

print("âœ“ Saved: top_features_frequency.png")

In [None]:
# ============================================================
# 11. FINAL SUMMARY REPORT
# ============================================================

summary_text = f"""
# FEATURE SELECTION RESULTS SUMMARY
{'='*60}

## Dataset Information
- Dataset: IBTrACS (Last 3 Years)
- Original Features: {baseline_features}
- Training Samples: {all_results['dataset'].value_counts().iloc[0] if len(all_results) > 0 else 'N/A'}
- Class Imbalance: ~89% majority class

## Baseline Performance (All {baseline_features} Features)
- Average F1 Score: {baseline_f1:.4f}
- Average AUC: {baseline['auc'].mean():.4f}

## Best Results by Method Category

### Traditional Methods:
"""

# Add best from each category
categories = {
    'Traditional': ['correlation', 'variance', 'chi_square'],
    'Embedded': ['l1_lasso', 'l2_ridge'],
    'XAI-Based': ['shap', 'lime'],
    'Optimization': ['pso', 'differential_evolution']
}

for category, methods in categories.items():
    cat_results = all_results[all_results['method'].isin(methods)]
    if len(cat_results) > 0:
        best = cat_results.loc[cat_results['f1_score'].idxmax()]
        summary_text += f"\n### {category}:\n"
        summary_text += f"- Best Method: {best['method'].replace('_', ' ').title()}\n"
        summary_text += f"- Features: {int(best['n_features'])} ({(1 - best['n_features']/baseline_features)*100:.1f}% reduction)\n"
        summary_text += f"- F1 Score: {best['f1_score']:.4f}\n"
        summary_text += f"- AUC: {best['auc']:.4f}\n"
        summary_text += f"- Model: {best['model'].replace('_', ' ').title()}\n"

summary_text += f"""

## Key Findings:

1. **Most Efficient Method**: {best_per_method.iloc[0]['method'].replace('_', ' ').title()}
   - Achieved F1={best_per_method.iloc[0]['f1_score']:.4f} with only {int(best_per_method.iloc[0]['n_features'])} features
   - Feature reduction: {(1 - best_per_method.iloc[0]['n_features']/baseline_features)*100:.1f}%

2. **Minimum Features for Baseline Performance**:
   - Need approximately {min_features_per_method['n_features'].min():.0f}-{min_features_per_method['n_features'].quantile(0.5):.0f} features
   - To maintain F1 â‰¥ {baseline_threshold:.4f}

3. **Most Frequently Selected Features**:
   - Top feature: {sorted_features[0][0]} (selected {sorted_features[0][1]} times)
   - Indicates consistent importance across methods

4. **Training Efficiency**:
   - Feature selection reduces training time
   - Average time with 20 features: ~{all_results[all_results['n_features'] == 20]['train_time'].mean():.3f}s
   - Vs. baseline: ~{baseline['train_time'].mean():.3f}s

## Recommendations:

1. **For Maximum Compression**: Use {best_per_method.iloc[0]['method'].replace('_', ' ').title()}
   - Minimal features while maintaining performance

2. **For Interpretability**: Use SHAP or LIME
   - Provides explanations for feature importance

3. **For Robustness**: Use optimization methods (PSO/DE)
   - Explores feature interactions comprehensively

## Files Generated:
- Plots: results/comparisons/plots/
- Tables: results/comparisons/tables/
- Feature lists: results/features/

{'='*60}
Generated: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
"""

# Save summary report
with open("results/comparisons/reports/thesis_summary.md", "w") as f:
    f.write(summary_text)

print(summary_text)
print("\nâœ“ Saved: thesis_summary.md")

In [None]:
# ============================================================
# 12. COMPLETION MESSAGE
# ============================================================

print("\n" + "="*60)
print("ANALYSIS COMPLETE!")
print("="*60)
print("\nGenerated Files:")
print("  ðŸ“Š Plots (6): results/comparisons/plots/")
print("  ðŸ“‹ Tables (4): results/comparisons/tables/")
print("  ðŸ“„ Summary: results/comparisons/reports/thesis_summary.md")
print("\nNext Steps:")
print("  1. Review the plots and tables")
print("  2. Identify key findings for your thesis")
print("  3. Generate LaTeX tables (run next notebook)")
print("="*60)