# Free Energy Principle vs Exploration - Results Analysis

This notebook analyzes the results of our experiment testing whether Free Energy Principle agents with active inference show better exploration efficiency, adaptation to uncertainty, and surprise minimization compared to other approaches.

## Hypothesis
**Agents with stronger Free Energy Principle implementation (higher epistemic_weight) will show better exploration efficiency, faster adaptation to uncertainty, and more robust performance in dynamic environments**

In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from pathlib import Path
import wandb
from scipy.stats import pearsonr

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Load results
results_path = Path("results.json")
if results_path.exists():
    with open(results_path, 'r') as f:
        results = json.load(f)
    print("✅ Results loaded successfully")
    print(f"Found results for {len(results)} agents")
else:
    print("❌ No results file found. Run train.py first.")
    results = {}

## 1. Training Performance Analysis

In [None]:
if results:
    # Extract training curves
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    for agent_name, agent_results in results.items():
        training_metrics = agent_results['training_metrics']
        episodes = [m['episode'] for m in training_metrics]
        rewards = [m['total_reward'] for m in training_metrics]
        success_rates = [m['success'] for m in training_metrics]
        
        # Smooth curves with rolling average
        window = 100
        if len(rewards) >= window:
            rewards_smooth = pd.Series(rewards).rolling(window).mean()
            success_smooth = pd.Series(success_rates).rolling(window).mean()
        else:
            rewards_smooth = rewards
            success_smooth = success_rates
        
        # Get agent-specific parameters
        config = agent_results['agent_config']
        if 'epistemic_weight' in config:
            epistemic_weight = config['epistemic_weight']
            label = f"{agent_name} (ε={epistemic_weight})"
        else:
            label = f"{agent_name} (Baseline)"
        
        # Plot training curves
        axes[0, 0].plot(episodes, rewards_smooth, label=label, alpha=0.8)
        axes[0, 1].plot(episodes, success_smooth, label=label, alpha=0.8)
        
        # Plot FEP-specific metrics if available
        if 'variational_free_energy' in training_metrics[0]:
            vfe_values = [m['variational_free_energy'] for m in training_metrics]
            epistemic_values = [m.get('epistemic_value', 0) for m in training_metrics]
            pragmatic_values = [m.get('pragmatic_value', 0) for m in training_metrics]
            surprise_values = [m.get('surprise', 0) for m in training_metrics]
            
            axes[0, 2].plot(episodes, vfe_values, label=label, alpha=0.8)
            axes[1, 0].plot(episodes, epistemic_values, label=label, alpha=0.8)
            axes[1, 1].plot(episodes, pragmatic_values, label=label, alpha=0.8)
            axes[1, 2].plot(episodes, surprise_values, label=label, alpha=0.8)
    
    # Configure subplots
    axes[0, 0].set_title('Training Rewards')
    axes[0, 0].set_xlabel('Episode')
    axes[0, 0].set_ylabel('Total Reward')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    axes[0, 1].set_title('Success Rate')
    axes[0, 1].set_xlabel('Episode')
    axes[0, 1].set_ylabel('Success Rate')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    axes[0, 2].set_title('Variational Free Energy')
    axes[0, 2].set_xlabel('Episode')
    axes[0, 2].set_ylabel('VFE')
    axes[0, 2].legend()
    axes[0, 2].grid(True, alpha=0.3)
    
    axes[1, 0].set_title('Epistemic Value (Information Gain)')
    axes[1, 0].set_xlabel('Episode')
    axes[1, 0].set_ylabel('Epistemic Value')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    axes[1, 1].set_title('Pragmatic Value (Goal Achievement)')
    axes[1, 1].set_xlabel('Episode')
    axes[1, 1].set_ylabel('Pragmatic Value')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    axes[1, 2].set_title('Surprise (Negative Log-Likelihood)')
    axes[1, 2].set_xlabel('Episode')
    axes[1, 2].set_ylabel('Surprise')
    axes[1, 2].legend()
    axes[1, 2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 2. FEP Test Scenarios Performance Analysis

In [None]:
if results:
    # Create comparison dataframe
    comparison_data = []
    
    for agent_name, agent_results in results.items():
        config = agent_results['agent_config']
        epistemic_weight = config.get('epistemic_weight', 0.0)
        standard_success = agent_results['standard_success_rate']
        
        # Add standard performance
        comparison_data.append({
            'Agent': agent_name,
            'Epistemic_Weight': epistemic_weight,
            'Scenario': 'Standard',
            'Success_Rate': standard_success,
            'Performance_Ratio': 1.0
        })
        
        # Add FEP test performances
        fep_results = agent_results['fep_results']
        fep_ratios = agent_results['fep_performance_ratios']
        
        for key, success_rate in fep_results.items():
            if key.endswith('_success_rate'):
                scenario = key.replace('_success_rate', '')
                ratio_key = f"{scenario}_performance_ratio"
                ratio = fep_ratios.get(ratio_key, 0)
                
                comparison_data.append({
                    'Agent': agent_name,
                    'Epistemic_Weight': epistemic_weight,
                    'Scenario': scenario.replace('_', ' ').title(),
                    'Success_Rate': success_rate,
                    'Performance_Ratio': ratio
                })
    
    df = pd.DataFrame(comparison_data)
    
    # Plot FEP performance comparison
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # Success rates by scenario
    sns.barplot(data=df, x='Scenario', y='Success_Rate', hue='Agent', ax=axes[0])
    axes[0].set_title('Success Rates by FEP Test Scenario')
    axes[0].set_ylabel('Success Rate')
    axes[0].tick_params(axis='x', rotation=45)
    
    # Performance ratios (FEP tests vs Standard)
    fep_df = df[df['Scenario'] != 'Standard']
    sns.barplot(data=fep_df, x='Scenario', y='Performance_Ratio', hue='Agent', ax=axes[1])
    axes[1].set_title('FEP Test Performance Ratios (Test/Standard)')
    axes[1].set_ylabel('Performance Ratio')
    axes[1].axhline(y=1.0, color='red', linestyle='--', alpha=0.7, label='Baseline')
    axes[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    # Display summary table
    print("\n📊 Performance Summary Table:")
    summary_table = df.pivot(index='Agent', columns='Scenario', values='Success_Rate')
    summary_table['Epistemic_Weight'] = df.groupby('Agent')['Epistemic_Weight'].first()
    summary_table = summary_table.sort_values('Epistemic_Weight')
    print(summary_table.round(3))

## 3. Active Inference Metrics Analysis