# Hybrid Agent Analysis

This notebook analyzes the performance of the Hybrid Cognitive Agent against a Pure RL baseline.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os

# Define paths
BASELINE_PATH = '../experiments/results/baseline_rl.csv'
HYBRID_PATH = '../experiments/results/hybrid_agent.csv'

# Check if files exist
if not os.path.exists(BASELINE_PATH) or not os.path.exists(HYBRID_PATH):
    print("Data files not found. Please run the experiments first.")
else:
    # Load Data
    baseline_df = pd.read_csv(BASELINE_PATH)
    hybrid_df = pd.read_csv(HYBRID_PATH)

    baseline_df['Agent'] = 'Pure RL'
    hybrid_df['Agent'] = 'Hybrid'

    df = pd.concat([baseline_df, hybrid_df])

    # 1. Learning Curve (Total Reward per Episode)
    episode_rewards = df.groupby(['Agent', 'episode'])['reward'].sum().reset_index()
    
    # Smoothing
    episode_rewards['reward_smooth'] = episode_rewards.groupby('Agent')['reward'].transform(lambda x: x.rolling(window=10).mean())

    fig1 = px.line(episode_rewards, x='episode', y='reward_smooth', color='Agent', title='Learning Performance (Smoothed)')
    fig1.show()
    
    # 2. Decision Source Analysis (Hybrid Only)
    if 'decision_source' in hybrid_df.columns:
        source_counts = hybrid_df.groupby(['episode', 'decision_source']).size().reset_index(name='count')
        fig2 = px.bar(source_counts, x='episode', y='count', color='decision_source', title='Decision Source over Time (Hybrid Agent)')
        fig2.show()

    # 3. Bounded Rationality (WM Capacity Sweep)
    WM_SWEEP_PATH = '../experiments/results/wm_sweep.csv'
    if os.path.exists(WM_SWEEP_PATH):
        wm_df = pd.read_csv(WM_SWEEP_PATH)
        # Plot mean reward per capacity (smoothed)
        wm_summary = wm_df.groupby(['capacity'])['total_reward'].mean().reset_index()
        fig3 = px.bar(wm_summary, x='capacity', y='total_reward', title='Effect of WM Capacity on Average Reward')
        fig3.show()
        
        # Or learning curves per capacity
        wm_df['reward_smooth'] = wm_df.groupby('capacity')['total_reward'].transform(lambda x: x.rolling(window=5).mean())
        fig4 = px.line(wm_df, x='episode', y='reward_smooth', color='capacity', title='Learning Curves by WM Capacity')
        fig4.show()

    # 4. Deceptive Maze Analysis (Confusion/Conflict)
    DECEPTIVE_PATH = '../experiments/results/deceptive_agent.csv'
    if os.path.exists(DECEPTIVE_PATH):
        dec_df = pd.read_csv(DECEPTIVE_PATH)
        
        # Plot Total Reward to show the "Dip"
        dec_rewards = dec_df.groupby('episode')['reward'].sum().reset_index()
        dec_rewards['reward_smooth'] = dec_rewards['reward'].rolling(window=10).mean()
        
        fig5 = px.line(dec_rewards, x='episode', y='reward_smooth', title='Deceptive Maze Performance (Seeking J-Curve)')
        fig5.show()
        
        # Plot Decision Source to see shift from Rule -> RL
        if 'decision_source' in dec_df.columns:
            source_dist = dec_df.groupby(['episode', 'decision_source']).size().reset_index(name='count')
            fig6 = px.bar(source_dist, x='episode', y='count', color='decision_source', title='Rule vs RL Dominance in Deceptive Maze')
            fig6.show()
