In [None]:
# analysis.py - Run this on your PC after downloading results from cluster

import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pathlib import Path

def load_results(results_dir='results'):
    """Load all results from directory"""
    results_path = Path(results_dir)
    
    # Load combined results
    all_results_file = results_path / 'all_results.pkl'
    with open(all_results_file, 'rb') as f:
        all_results = pickle.load(f)
    
    return all_results


def plot_training_curves(all_results, save_dir='plots'):
    """Plot training curves across all seeds"""
    Path(save_dir).mkdir(exist_ok=True)
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Rewards
    ax = axes[0, 0]
    for result in all_results['training']:
        seed = result['seed']
        rewards = result['rewards']
        # Smooth with rolling average
        smoothed = pd.Series(rewards).rolling(20, min_periods=1).mean()
        ax.plot(smoothed, label=f'Seed {seed}', alpha=0.7)
    ax.set_title('Training Rewards (20-episode moving average)')
    ax.set_xlabel('Episode')
    ax.set_ylabel('Reward')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # Lifetime
    ax = axes[0, 1]
    for result in all_results['training']:
        seed = result['seed']
        metrics_df = pd.DataFrame(result['metrics'])
        smoothed = metrics_df['lifetime'].rolling(20, min_periods=1).mean()
        ax.plot(smoothed, label=f'Seed {seed}', alpha=0.7)
    ax.set_title('Average Lifetime')
    ax.set_xlabel('Episode')
    ax.set_ylabel('Steps')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # Pellet Efficiency
    ax = axes[1, 0]
    for result in all_results['training']:
        seed = result['seed']
        metrics_df = pd.DataFrame(result['metrics'])
        smoothed = metrics_df['pellet_efficiency'].rolling(20, min_periods=1).mean()
        ax.plot(smoothed, label=f'Seed {seed}', alpha=0.7)
    ax.set_title('Pellet Efficiency')
    ax.set_xlabel('Episode')
    ax.set_ylabel('Pellets/Step')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # Ghost-Eating Efficiency
    ax = axes[1, 1]
    for result in all_results['training']:
        seed = result['seed']
        metrics_df = pd.DataFrame(result['metrics'])
        smoothed = metrics_df['ghost_eating_efficiency'].rolling(20, min_periods=1).mean()
        ax.plot(smoothed, label=f'Seed {seed}', alpha=0.7)
    ax.set_title('Ghost-Eating Efficiency')
    ax.set_xlabel('Episode')
    ax.set_ylabel('Ghosts/Power Pellet')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'{save_dir}/training_curves.png', dpi=300)
    print(f"Saved training curves to {save_dir}/training_curves.png")
    plt.close()


def plot_evaluation_results(all_results, save_dir='plots'):
    """Plot evaluation results with boxplots"""
    Path(save_dir).mkdir(exist_ok=True)
    
    # Collect all evaluation metrics
    eval_data = []
    for result in all_results['evaluation']:
        seed = result['train_seed']
        for metric in result['eval_metrics']:
            metric['seed'] = seed
            eval_data.append(metric)
    
    eval_df = pd.DataFrame(eval_data)
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Lifetime
    ax = axes[0, 0]
    eval_df.boxplot(column='lifetime', by='seed', ax=ax)
    ax.set_title('Lifetime Distribution by Seed')
    ax.set_xlabel('Training Seed')
    ax.set_ylabel('Lifetime (steps)')
    plt.sca(ax)
    plt.xticks(rotation=0)
    
    # Pellet Efficiency
    ax = axes[0, 1]
    eval_df.boxplot(column='pellet_efficiency', by='seed', ax=ax)
    ax.set_title('Pellet Efficiency Distribution by Seed')
    ax.set_xlabel('Training Seed')
    ax.set_ylabel('Pellets/Step')
    plt.sca(ax)
    plt.xticks(rotation=0)
    
    # Ghost-Eating Efficiency
    ax = axes[1, 0]
    eval_df.boxplot(column='ghost_eating_efficiency', by='seed', ax=ax)
    ax.set_title('Ghost-Eating Efficiency Distribution by Seed')
    ax.set_xlabel('Training Seed')
    ax.set_ylabel('Ghosts/Power Pellet')
    plt.sca(ax)
    plt.xticks(rotation=0)
    
    # Backtracking Rate
    ax = axes[1, 1]
    eval_df.boxplot(column='backtracking_rate', by='seed', ax=ax)
    ax.set_title('Backtracking Rate Distribution by Seed')
    ax.set_xlabel('Training Seed')
    ax.set_ylabel('Backtrack %')
    plt.sca(ax)
    plt.xticks(rotation=0)
    
    plt.suptitle('')  # Remove automatic title
    plt.tight_layout()
    plt.savefig(f'{save_dir}/evaluation_boxplots.png', dpi=300)
    print(f"Saved evaluation boxplots to {save_dir}/evaluation_boxplots.png")
    plt.close()
    
    # Print summary statistics
    print("\n" + "="*60)
    print("EVALUATION SUMMARY STATISTICS")
    print("="*60)
    print(eval_df.groupby('seed')[['lifetime', 'pellet_efficiency', 
                                     'ghost_eating_efficiency', 
                                     'backtracking_rate']].describe())
    
    # Save to CSV
    eval_df.to_csv(f'{save_dir}/evaluation_results.csv', index=False)
    print(f"\nSaved detailed results to {save_dir}/evaluation_results.csv")


def generate_full_report(results_dir='results', plot_dir='plots'):
    """Generate complete analysis report"""
    
    print("Loading results...")
    all_results = load_results(results_dir)
    
    print("Generating training curves...")
    plot_training_curves(all_results, plot_dir)
    
    print("Generating evaluation plots...")
    plot_evaluation_results(all_results, plot_dir)
    
    print("\n" + "="*60)
    print("Analysis complete!")
    print(f"Plots saved to {plot_dir}/")
    print("="*60)


# To use on your PC:
if __name__ == "__main__":
    generate_full_report(results_dir='results', plot_dir='plots')