# Point Cloud Instance Segmentation Results Analysis

This notebook analyzes the performance of different models and configurations for point cloud instance segmentation.

In [None]:
import sys
sys.path.append('..')

import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import yaml
import wandb
from tqdm.notebook import tqdm

from src.evaluation import evaluate_predictions
from src.evaluation.visualizer import PointCloudVisualizer
from src.models import PointNet2InstanceSegmentation, SparseCNNInstanceSegmentation

# Set plotting style
plt.style.use('seaborn')
sns.set_palette('husl')
%matplotlib inline

## 1. Load Results and Configurations

In [None]:
def load_experiment_results(results_dir: str):
    """Load results from multiple experiments."""
    results_dir = Path(results_dir)
    
    results = {
        'pointnet2': {},
        'sparsecnn': {}
    }
    
    # Load results for each model type
    for model_type in results.keys():
        model_dir = results_dir / model_type
        if not model_dir.exists():
            continue
            
        # Load each experiment
        for exp_dir in model_dir.glob('*'):
            if not exp_dir.is_dir():
                continue
                
            # Load config
            config_path = exp_dir / 'config.yaml'
            with open(config_path, 'r') as f:
                config = yaml.safe_load(f)
                
            # Load metrics
            metrics_path = exp_dir / 'evaluation_results.yaml'
            with open(metrics_path, 'r') as f:
                metrics = yaml.safe_load(f)
                
            results[model_type][exp_dir.name] = {
                'config': config,
                'metrics': metrics
            }
    
    return results

# Load results
results = load_experiment_results('../results')

## 2. Overall Performance Comparison

In [None]:
def compare_model_performance(results):
    """Compare performance metrics across different models and configurations."""
    # Prepare data for visualization
    data = []
    
    for model_type, experiments in results.items():
        for exp_name, exp_results in experiments.items():
            metrics = exp_results['metrics']
            
            data.append({
                'Model': model_type,
                'Experiment': exp_name,
                'mAP': metrics['mAP'],
                'IoU': metrics['IoU'],
                'Semantic Accuracy': metrics.get('semantic_accuracy', 0)
            })
    
    df = pd.DataFrame(data)
    
    # Create comparative visualizations
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # mAP comparison
    sns.barplot(data=df, x='Model', y='mAP', ax=axes[0])
    axes[0].set_title('Mean Average Precision')
    
    # IoU comparison
    sns.barplot(data=df, x='Model', y='IoU', ax=axes[1])
    axes[1].set_title('Mean IoU')
    
    # Semantic accuracy comparison
    sns.barplot(data=df, x='Model', y='Semantic Accuracy', ax=axes[2])
    axes[2].set_title('Semantic Segmentation Accuracy')
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed statistics
    print("\nDetailed Statistics:")
    print("===================\n")
    
    for metric in ['mAP', 'IoU', 'Semantic Accuracy']:
        print(f"\n{metric}:")
        print(df.groupby('Model')[metric].describe())
    
    return df

performance_df = compare_model_performance(results)

## 3. Instance Size Analysis

In [None]:
def analyze_instance_size_performance(results):
    """Analyze model performance across different instance sizes."""
    size_categories = ['small', 'medium', 'large']
    
    # Collect performance data by instance size
    size_data = []
    
    for model_type, experiments in results.items():
        for exp_name, exp_results in experiments.items():
            metrics = exp_results['metrics']
            
            if 'size_metrics' in metrics:
                for size in size_categories:
                    size_data.append({
                        'Model': model_type,
                        'Experiment': exp_name,
                        'Size': size,
                        'mAP': metrics['size_metrics'][f'{size}_mAP'],
                        'IoU': metrics['size_metrics'][f'{size}_IoU']
                    })
    
    df = pd.DataFrame(size_data)
    
    # Create visualizations
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # mAP by instance size
    sns.boxplot(data=df, x='Size', y='mAP', hue='Model', ax=ax1)
    ax1.set_title('mAP by Instance Size')
    
    # IoU by instance size
    sns.boxplot(data=df, x='Size', y='IoU', hue='Model', ax=ax2)
    ax2.set_title('IoU by Instance Size')
    
    plt.tight_layout()
    plt.show()
    
    # Statistical analysis
    print("\nPerformance Statistics by Instance Size:")
    print("=====================================\n")
    
    for metric in ['mAP', 'IoU']:
        print(f"\n{metric} by Size:")
        print(df.groupby(['Model', 'Size'])[metric].describe())
    
    return df

size_performance_df = analyze_instance_size_performance(results)

## 4. Training Convergence Analysis

In [None]:
def analyze_training_convergence():
    """Analyze training convergence using W&B logs."""
    # Connect to W&B
    api = wandb.Api()
    
    # Get runs for each model
    models = ['pointnet2', 'sparsecnn']
    runs_data = {}
    
    for model in models:
        runs = api.runs(f"your-project/{model}")
        runs_data[model] = []
        
        for run in runs:
            history = pd.DataFrame(run.scan_history())
            runs_data[model].append({
                'run_name': run.name,
                'history': history
            })
    
    # Plot training curves
    metrics = ['loss', 'val_loss', 'mAP', 'val_mAP']
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    for i, metric in enumerate(metrics):
        ax = axes[i // 2, i % 2]
        
        for model in models:
            for run_data in runs_data[model]:
                history = run_data['history']
                if metric in history.columns:
                    ax.plot(history['epoch'], history[metric],
                           label=f"{model}-{run_data['run_name']}",
                           alpha=0.7)
                    
        ax.set_title(f'{metric} vs Epoch')
        ax.set_xlabel('Epoch')
        ax.set_ylabel(metric)
        ax.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Analyze convergence statistics
    convergence_stats = {}
    
    for model in models:
        convergence_stats[model] = {
            'epochs_to_converge': [],
            'final_performance': []
        }
        
        for run_data in runs_data[model]:
            history = run_data['history']
            
            # Calculate epochs to convergence
            if 'val_loss' in history.columns:
                smoothed_loss = history['val_loss'].rolling(window=5).mean()
                converged_epoch = len(smoothed_loss)
                
                for i in range(len(smoothed_loss) - 5):
                    if abs(smoothed_loss.iloc[i:i+5].mean() - 
                          smoothed_loss.iloc[i+5:i+10].mean()) < 0.001:
                        converged_epoch = i
                        break
                        
                convergence_stats[model]['epochs_to_converge'].append(converged_epoch)
                
            # Record final performance
            if 'val_mAP' in history.columns:
                final_map = history['val_mAP'].iloc[-5:].mean()
                convergence_stats[model]['final_performance'].append(final_map)
    
    print("\nConvergence Statistics:")
    print("=====================\n")
    
    for model in models:
        print(f"\n{model.upper()}:")
        print(f"Average epochs to converge: "
              f"{np.mean(convergence_stats[model]['epochs_to_converge']):.1f} ± "
              f"{np.std(convergence_stats[model]['epochs_to_converge']):.1f}")
        print(f"Final mAP: "
              f"{np.mean(convergence_stats[model]['final_performance']):.3f} ± "
              f"{np.std(convergence_stats[model]['final_performance']):.3f}")
    
    return runs_data, convergence_stats

runs_data, convergence_stats = analyze_training_convergence()

## 5. Error Analysis

In [None]:
def analyze_errors(results):
    """Analyze error patterns and failure cases."""
    error_patterns = {
        'over_segmentation': [],
        'under_segmentation': [],
        'boundary_errors': [],
        'classification_errors': []
    }
    
    # Load validation predictions
    for model_type, experiments in results.items():
        for exp_name, exp_results in experiments.items():
            if 'error_analysis' in exp_results['metrics']:
                error_analysis = exp_results['metrics']['error_analysis']
                
                error_patterns['over_segmentation'].append({
                    'model': model_type,
                    'experiment': exp_name,
                    'rate': error_analysis['over_segmentation_rate']
                })
                
                error_patterns['under_segmentation'].append({
                    'model': model_type,
                    'experiment': exp_name,
                    'rate': error_analysis['under_segmentation_rate']
                })
                
                error_patterns['boundary_errors'].append({
                    'model': model_type,
                    'experiment': exp_name,
                    'rate': error_analysis['boundary_error_rate']
                })
                
                error_patterns['classification_errors'].append({
                    'model': model_type,
                    'experiment': exp_name,
                    'rate': error_analysis['classification_error_rate']
                })
    
    # Visualize error patterns
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    for i, (error_type, data) in enumerate(error_patterns.items()):
        if not data:  # Skip if no data available
            continue
            
        df = pd.DataFrame(data)
        ax = axes[i // 2, i % 2]
        
        sns.boxplot(data=df, x='model', y='rate', ax=ax)
        ax.set_title(f'{error_type.replace("_", " ").title()} Rate')
        ax.set_ylabel('Error Rate')
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed statistics
    print("Error Pattern Analysis:")
    print("=====================\n")
    
    for error_type, data in error_patterns.items():
        if not data:
            continue
            
        df = pd.DataFrame(data)
        print(f"\n{error_type.replace('_', ' ').title()}:")
        print(df.groupby('model')['rate'].describe())
    
    return error_patterns

error_patterns = analyze_errors(results)

## 6. Instance Boundary Analysis

In [None]:
def analyze_boundary_quality(results):
    """Analyze instance boundary quality and precision."""
    boundary_metrics = []
    
    for model_type, experiments in results.items():
        for exp_name, exp_results in experiments.items():
            if 'boundary_metrics' in exp_results['metrics']:
                metrics = exp_results['metrics']['boundary_metrics']
                
                boundary_metrics.append({
                    'model': model_type,
                    'experiment': exp_name,
                    'precision': metrics['boundary_precision'],
                    'recall': metrics['boundary_recall'],
                    'f1': metrics['boundary_f1'],
                    'accuracy': metrics['boundary_accuracy']
                })
    
    df = pd.DataFrame(boundary_metrics)
    
    # Visualize boundary metrics
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    metrics = ['precision', 'recall', 'f1', 'accuracy']
    
    for i, metric in enumerate(metrics):
        ax = axes[i // 2, i % 2]
        sns.boxplot(data=df, x='model', y=metric, ax=ax)
        ax.set_title(f'Boundary {metric.title()}')
    
    plt.tight_layout()
    plt.show()
    
    # Create precision-recall curve
    plt.figure(figsize=(8, 6))
    
    for model in df['model'].unique():
        model_data = df[df['model'] == model]
        plt.scatter(model_data['recall'], model_data['precision'],
                   label=model, alpha=0.7)
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Boundary Precision-Recall')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    return df

boundary_df = analyze_boundary_quality(results)

## 7. Computational Performance Analysis

In [None]:
def analyze_computational_performance(results):
    """Analyze computational efficiency and resource usage."""
    performance_metrics = []
    
    for model_type, experiments in results.items():
        for exp_name, exp_results in experiments.items():
            if 'performance_metrics' in exp_results['metrics']:
                metrics = exp_results['metrics']['performance_metrics']
                
                performance_metrics.append({
                    'model': model_type,
                    'experiment': exp_name,
                    'inference_time': metrics['inference_time'],
                    'memory_usage': metrics['memory_usage'],
                    'flops': metrics['flops'],
                    'parameters': metrics['parameters']
                })
    
    df = pd.DataFrame(performance_metrics)
    
    # Visualize performance metrics
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    metrics = ['inference_time', 'memory_usage', 'flops', 'parameters']
    
    for i, metric in enumerate(metrics):
        ax = axes[i // 2, i % 2]
        sns.boxplot(data=df, x='model', y=metric, ax=ax)
        ax.set_title(f'{metric.replace("_", " ").title()}')
    
    plt.tight_layout()
    plt.show()
    
    # Create performance trade-off plot
    plt.figure(figsize=(10, 6))
    
    for model in df['model'].unique():
        model_data = df[df['model'] == model]
        plt.scatter(model_data['inference_time'], 
                   model_data['memory_usage'],
                   s=model_data['parameters'] / 1e4,
                   label=model, alpha=0.7)
    
    plt.xlabel('Inference Time (ms)')
    plt.ylabel('Memory Usage (MB)')
    plt.title('Performance Trade-off Analysis')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    # Print detailed statistics
    print("Performance Statistics:")
    print("=====================\n")
    
    for metric in metrics:
        print(f"\n{metric.replace('_', ' ').title()}:")
        print(df.groupby('model')[metric].describe())
    
    return df

performance_df = analyze_computational_performance(results)

## 8. Ablation Study Analysis

In [None]:
def analyze_ablation_studies(results):
    """Analyze the impact of different model components and configurations."""
    ablation_results = []
    
    for model_type, experiments in results.items():
        for exp_name, exp_results in experiments.items():
            config = exp_results['config']
            metrics = exp_results['metrics']
            
            # Extract key configuration parameters
            ablation_results.append({
                'model': model_type,
                'experiment': exp_name,
                'feature_type': config['model'].get('feature_type', 'default'),
                'attention': config['model'].get('use_attention', False),
                'multi_scale': config['model'].get('multi_scale', False),
                'mAP': metrics['mAP'],
                'IoU': metrics['IoU']
            })
    
    df = pd.DataFrame(ablation_results)
    
    # Analyze impact of different components
    component_analysis = []
    
    # Feature type analysis
    feature_impact = df.groupby(['model', 'feature_type'])['mAP'].mean().unstack()
    component_analysis.append(('Feature Type', feature_impact))
    
    # Attention mechanism analysis
    attention_impact = df.groupby(['model', 'attention'])['mAP'].mean().unstack()
    component_analysis.append(('Attention', attention_impact))
    
    # Multi-scale analysis
    scale_impact = df.groupby(['model', 'multi_scale'])['mAP'].mean().unstack()
    component_analysis.append(('Multi-Scale', scale_impact))
    
    # Visualize component impact
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    for i, (component, impact) in enumerate(component_analysis):
        impact.plot(kind='bar', ax=axes[i])
        axes[i].set_title(f'Impact of {component}')
        axes[i].set_ylabel('mAP')
        axes[i].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    # Statistical significance testing
    print("Statistical Significance Analysis:")
    print("===============================\n")
    
    from scipy import stats
    
    for component in ['feature_type', 'attention', 'multi_scale']:
        print(f"\n{component.replace('_', ' ').title()} Impact:")
        
        for model in df['model'].unique():
            values = df[df['model'] == model][component].unique()
            if len(values) > 1:
                groups = [df[(df['model'] == model) & (df[component] == val)]['mAP']
                         for val in values]
                f_stat, p_value = stats.f_oneway(*groups)
                print(f"{model}: F-statistic = {f_stat:.3f}, p-value = {p_value:.3f}")
    
    return df

ablation_df = analyze_ablation_studies(results)

## 9. Qualitative Analysis

In [None]:
def visualize_best_worst_cases(results):
    """Visualize best and worst performing examples."""
    visualizer = PointCloudVisualizer(config=None)
    
    for model_type, experiments in results.items():
        for exp_name, exp_results in experiments.items():
            if 'case_studies' in exp_results['metrics']:
                cases = exp_results['metrics']['case_studies']
                
                # Visualize best case
                print(f"\nBest Case for {model_type} - {exp_name}:")
                visualizer.create_comparison_visualization(
                    points=cases['best_case']['points'],
                    pred_instances=cases['best_case']['predictions'],
                    gt_instances=cases['best_case']['ground_truth'],
                    filename=f'best_case_{model_type}_{exp_name}'
                )
                
                # Visualize worst case
                print(f"\nWorst Case for {model_type} - {exp_name}:")
                visualizer.create_comparison_visualization(
                    points=cases['worst_case']['points'],
                    pred_instances=cases['worst_case']['predictions'],
                    gt_instances=cases['worst_case']['ground_truth'],
                    filename=f'worst_case_{model_type}_{exp_name}'
                )

visualize_best_worst_cases(results)

## 10. Model Ensemble Analysis

In [None]:
def analyze_ensemble_performance(results):
    """Analyze performance of model ensembles."""
    if 'ensemble' not in results:
        print("No ensemble results available.")
        return
        
    ensemble_metrics = []
    individual_metrics = []
    
    # Collect ensemble and individual model metrics
    for exp_name, exp_results in results['ensemble'].items():
        metrics = exp_results['metrics']
        
        ensemble_metrics.append({
            'experiment': exp_name,
            'mAP': metrics['mAP'],
            'IoU': metrics['IoU']
        })
        
        # Collect individual model performances
        for model in ['pointnet2', 'sparsecnn']:
            if model in results and exp_name in results[model]:
                individual_metrics.append({
                    'model': model,
                    'experiment': exp_name,
                    'mAP': results[model][exp_name]['metrics']['mAP'],
                    'IoU': results[model][exp_name]['metrics']['IoU']
                })
    
    # Compare ensemble vs individual performance
    plt.figure(figsize=(10, 6))
    
    # Plot individual model performance
    for model in ['pointnet2', 'sparsecnn']:
        model_data = pd.DataFrame([m for m in individual_metrics if m['model'] == model])
        plt.scatter(model_data['mAP'], model_data['IoU'],
                   label=f'Individual {model}', alpha=0.6)
    
    # Plot ensemble performance
    ensemble_data = pd.DataFrame(ensemble_metrics)
    plt.scatter(ensemble_data['mAP'], ensemble_data['IoU'],
               label='Ensemble', marker='*', s=200)
    
    plt.xlabel('mAP')
    plt.ylabel('IoU')
    plt.title('Ensemble vs Individual Model Performance')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    # Print performance comparison
    print("Performance Comparison:")
    print("=====================\n")
    
    print("Ensemble Performance:")
    print(pd.DataFrame(ensemble_metrics).describe())
    
    print("\nIndividual Model Performance:")
    print(pd.DataFrame(individual_metrics).groupby('model').describe())

analyze_ensemble_performance(results)

## 11. Final Recommendations

Based on the comprehensive analysis above, here are the key findings and recommendations:

### Model Selection
1. **Best Overall Performance**:
   - Model: [Best performing model]
   - Configuration: [Optimal configuration]
   - Key metrics: mAP = [value], IoU = [value]

2. **Trade-offs**:
   - Performance vs. Speed: [Analysis]
   - Memory vs. Accuracy: [Analysis]
   - Complexity vs. Results: [Analysis]

### Key Component Findings
1. **Feature Engineering**:
   - Most effective features: [List]
   - Recommended combinations: [List]

2. **Architecture Choices**:
   - Critical components: [List]
   - Optional enhancements: [List]

### Practical Implementation Recommendations
1. **Training Strategy**:
   - Batch size: [value]
   - Learning rate schedule: [details]
   - Data augmentation: [recommendations]

2. **Deployment Considerations**:
   - Memory requirements: [details]
   - Inference optimization: [strategies]
   - Hardware requirements: [specifications]

### Future Improvements
1. **Research Directions**:
   - [Potential improvement 1]
   - [Potential improvement 2]
   - [Potential improvement 3]

2. **Technical Enhancements**:
   - [Enhancement 1]
   - [Enhancement 2]
   - [Enhancement 3]

## 12. Export Results

In [None]:
def export_analysis_results(results_dict):
    """Export analysis results to various formats."""
    output_dir = Path('../results/analysis')
    output_dir.mkdir(exist_ok=True, parents=True)
    
    # Save performance metrics to CSV
    performance_df.to_csv(output_dir / 'performance_metrics.csv')
    
    # Save error analysis to CSV
    pd.DataFrame(error_patterns).to_csv(output_dir / 'error_analysis.csv')
    
    # Save ablation study results
    ablation_df.to_csv(output_dir / 'ablation_study.csv')
    
    # Create summary report
    with open(output_dir / 'analysis_summary.md', 'w') as f:
        f.write('# Instance Segmentation Analysis Summary\n\n')
        
        f.write('## Performance Metrics\n')
        f.write(performance_df.describe().to_markdown())
        
        f.write('\n## Error Analysis\n')
        f.write(pd.DataFrame(error_patterns).describe().to_markdown())
        
        f.write('\n## Ablation Study Results\n')
        f.write(ablation_df.describe().to_markdown())
    
    print(f"Analysis results exported to {output_dir}")

# Export all results
export_analysis_results(results)