# Statistical Analysis Report

This notebook provides rigorous statistical analysis of the validation results.

In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Add project root to path
project_root = Path('..').resolve()
sys.path.insert(0, str(project_root))

# Set style
sns.set_theme(style="whitegrid")

ModuleNotFoundError: No module named 'seaborn'

## Methodology

We compare the MATLAB and Python implementations using:
1. **T-tests** for mean radius differences
2. **Kolmogorov-Smirnov tests** for distribution similarity
3. **Correlation analysis** (Pearson/Spearman) for matched structures

In [None]:
# Load latest comparison report
output_dir = project_root / 'comparisons'
comparison_folders = sorted([d for d in output_dir.iterdir() if d.is_dir()], reverse=True)

if comparison_folders:
    latest_run = comparison_folders[0]
    report_path = latest_run / 'comparison_report.json'
    
    if report_path.exists():
        print(f"Loading analysis for: {latest_run.name}")
        with open(report_path, 'r') as f:
            data = json.load(f)
            
        # Convert to DataFrame for analysis
        if 'matlab' in data and 'python' in data:
            # Example: Compare vertex radii distributions if raw data available
            # Note: The report usually contains summaries. For full stats, we need raw .mat/.json
            print("Report loaded. Ready for statistical tests.")
            
            # Display basic stats
            rows = []
            for metric in ['vertices_count', 'edges_count', 'strand_count']:
                m_val = data.get('matlab', {}).get(metric, 0)
                p_val = data.get('python', {}).get(metric if metric != 'strand_count' else 'network_strands_count', 0)
                rows.append({'Metric': metric, 'MATLAB': m_val, 'Python': p_val, 'Diff': p_val - m_val})
            
            df = pd.DataFrame(rows)
            display(df)
            
            # Plot comparison
            plt.figure(figsize=(10, 5))
            df_melt = df.melt(id_vars=['Metric'], value_vars=['MATLAB', 'Python'], var_name='Implementation', value_name='Count')
            sns.barplot(data=df_melt, x='Metric', y='Count', hue='Implementation')
            plt.title('Component Counts Comparison')
            plt.show()
            
    else:
        print("No report found in latest run.")
else:
    print("No comparisons found.")