In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from tensorboard.backend.event_processing import event_accumulator

# Plotting style
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

## 1. Load Training Results

In [None]:
def load_results(agent_name, log_dir='../logs'):
    """Load training results from JSON file."""
    results_path = Path(log_dir) / agent_name / 'results.json'
    
    if results_path.exists():
        with open(results_path, 'r') as f:
            return json.load(f)
    else:
        print(f"Results not found: {results_path}")
        return None

def load_tensorboard_data(agent_name, log_dir='../logs', tag='Episode/Score'):
    """Load episode scores from TensorBoard logs."""
    log_path = Path(log_dir) / agent_name
    
    if not log_path.exists():
        print(f"Log directory not found: {log_path}")
        return None, None
    
    # Find event file
    event_files = list(log_path.glob('events.out.tfevents.*'))
    if not event_files:
        print(f"No TensorBoard event files found in {log_path}")
        return None, None
    
    # Load events
    ea = event_accumulator.EventAccumulator(str(log_path))
    ea.Reload()
    
    # Get scalar data
    try:
        events = ea.Scalars(tag)
        steps = [e.step for e in events]
        values = [e.value for e in events]
        return steps, values
    except KeyError:
        print(f"Tag '{tag}' not found in TensorBoard logs")
        return None, None

# Load results for all agents
agents = ['dqn', 'ppo', 'sac']
results = {agent: load_results(agent) for agent in agents}

# Display summary
for agent, result in results.items():
    if result:
        print(f"\n{agent.upper()} Results:")
        print(f"  Total Episodes: {result.get('total_episodes', 'N/A')}")
        print(f"  Mean Score: {result.get('mean_score', 'N/A'):.2f}")
        print(f"  Max Score: {result.get('max_score', 'N/A'):.2f}")
        print(f"  Converged at Episode: {result.get('converged_episode', 'N/A')}")

## 2. Learning Curves Comparison

In [None]:
def plot_learning_curves(agents, log_dir='../logs', window=50):
    """Plot learning curves for all agents."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    colors = {'dqn': 'blue', 'ppo': 'green', 'sac': 'red'}
    
    for agent in agents:
        episodes, scores = load_tensorboard_data(agent, log_dir)
        
        if episodes is not None and scores is not None:
            # Plot raw scores
            ax1.plot(episodes, scores, alpha=0.3, color=colors[agent])
            
            # Calculate moving average
            if len(scores) >= window:
                moving_avg = pd.Series(scores).rolling(window=window).mean()
                ax1.plot(episodes, moving_avg, label=agent.upper(), 
                        color=colors[agent], linewidth=2)
            else:
                ax1.plot(episodes, scores, label=agent.upper(), 
                        color=colors[agent], linewidth=2)
    
    ax1.set_xlabel('Episode')
    ax1.set_ylabel('Episode Reward')
    ax1.set_title('Learning Curves (Episode Rewards)')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    ax1.axhline(y=700, color='black', linestyle='--', label='Target Score (700)')
    
    # Plot only moving averages
    for agent in agents:
        episodes, scores = load_tensorboard_data(agent, log_dir)
        
        if episodes is not None and scores is not None:
            if len(scores) >= window:
                moving_avg = pd.Series(scores).rolling(window=window).mean()
                ax2.plot(episodes, moving_avg, label=agent.upper(), 
                        color=colors[agent], linewidth=2)
    
    ax2.set_xlabel('Episode')
    ax2.set_ylabel(f'Moving Average ({window} episodes)')
    ax2.set_title(f'Smoothed Learning Curves')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    ax2.axhline(y=700, color='black', linestyle='--', label='Target Score (700)')
    
    plt.tight_layout()
    plt.savefig('../logs/learning_curves_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()

plot_learning_curves(agents)

## 3. Sample Efficiency Comparison

In [None]:
def plot_sample_efficiency(agents, results):
    """Plot sample efficiency comparison."""
    fig, ax = plt.subplots(figsize=(10, 6))
    
    agent_names = []
    converged_episodes = []
    final_scores = []
    
    for agent in agents:
        if results[agent]:
            agent_names.append(agent.upper())
            conv_ep = results[agent].get('converged_episode')
            converged_episodes.append(conv_ep if conv_ep else results[agent]['total_episodes'])
            final_scores.append(results[agent].get('recent_mean', results[agent]['mean_score']))
    
    # Create bar plot
    x = np.arange(len(agent_names))
    width = 0.35
    
    ax.bar(x - width/2, converged_episodes, width, label='Episodes to Convergence', 
           color=['blue', 'green', 'red'], alpha=0.7)
    ax.bar(x + width/2, final_scores, width, label='Final Average Score', 
           color=['lightblue', 'lightgreen', 'lightcoral'], alpha=0.7)
    
    ax.set_xlabel('Agent')
    ax.set_ylabel('Value')
    ax.set_title('Sample Efficiency and Final Performance')
    ax.set_xticks(x)
    ax.set_xticklabels(agent_names)
    ax.legend()
    ax.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('../logs/sample_efficiency_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()

plot_sample_efficiency(agents, results)

## 4. Statistical Summary

In [None]:
def create_summary_table(agents, results):
    """Create summary statistics table."""
    data = []
    
    for agent in agents:
        if results[agent]:
            r = results[agent]
            data.append({
                'Agent': agent.upper(),
                'Total Episodes': r.get('total_episodes', 'N/A'),
                'Mean Score': f"{r.get('mean_score', 0):.2f}",
                'Std Score': f"{r.get('std_score', 0):.2f}",
                'Min Score': f"{r.get('min_score', 0):.2f}",
                'Max Score': f"{r.get('max_score', 0):.2f}",
                'Recent Mean': f"{r.get('recent_mean', 0):.2f}",
                'Recent Std': f"{r.get('recent_std', 0):.2f}",
                'Converged Episode': r.get('converged_episode', 'N/A')
            })
    
    df = pd.DataFrame(data)
    return df

summary_df = create_summary_table(agents, results)
print("\n" + "="*100)
print("AGENT COMPARISON SUMMARY")
print("="*100)
print(summary_df.to_string(index=False))
print("="*100 + "\n")

# Save to CSV
summary_df.to_csv('../logs/comparison_summary.csv', index=False)
print("Summary saved to: logs/comparison_summary.csv")

## 5. Box Plot Comparison

In [None]:
def plot_score_distributions(agents, log_dir='../logs'):
    """Plot score distributions for all agents."""
    fig, ax = plt.subplots(figsize=(10, 6))
    
    data = []
    labels = []
    
    for agent in agents:
        _, scores = load_tensorboard_data(agent, log_dir)
        if scores:
            # Use last 100 episodes for distribution
            data.append(scores[-100:])
            labels.append(agent.upper())
    
    bp = ax.boxplot(data, labels=labels, patch_artist=True)
    
    # Color boxes
    colors = ['lightblue', 'lightgreen', 'lightcoral']
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)
    
    ax.set_ylabel('Episode Reward')
    ax.set_title('Score Distribution (Last 100 Episodes)')
    ax.axhline(y=700, color='red', linestyle='--', label='Target Score (700)', alpha=0.5)
    ax.grid(True, alpha=0.3, axis='y')
    ax.legend()
    
    plt.tight_layout()
    plt.savefig('../logs/score_distributions.png', dpi=300, bbox_inches='tight')
    plt.show()

plot_score_distributions(agents)

## 6. Conclusion

### Key Findings:

1. **Sample Efficiency**: Compare which agent reached the target score (700) fastest
2. **Final Performance**: Compare the final average scores across agents
3. **Stability**: Compare variance in episode rewards (lower is better)
4. **Action Space**: DQN uses discrete actions while PPO/SAC use continuous actions

### Expected Results:
- **DQN**: Typically achieves 700-850 scores but may be less sample-efficient
- **PPO**: Good balance of sample efficiency and final performance
- **SAC**: Often most sample-efficient with continuous actions, typically achieves 850-950

### Recommendations:
- For deployment: Choose the agent with best final performance and stability
- For sample efficiency: Choose the agent that converges fastest
- For continuous control: PPO or SAC are more suitable than DQN