# Causality vs Correlation Experiment Analysis

This notebook analyzes the results from the causality experiment, comparing how well agents with causal reasoning adapt to interventions versus baseline agents using only correlational learning.

## Hypothesis
Agents with stronger causal reasoning (higher causal_strength) will show better adaptation to environmental interventions and causal transfer learning.

## Key Metrics
- **Intervention Adaptation Rate**: How quickly agents adapt to new interventions
- **Causal Transfer Efficiency**: How well learning from one intervention transfers to another
- **Causal Graph Accuracy**: How well the learned causal structure matches reality

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

# Set style
plt.style.use('default')
sns.set_palette("husl")

print("📊 Analysis environment initialized")

## Load Experiment Results

In [None]:
# Load results from the experiment
results_path = Path("results.json")

if results_path.exists():
    with open(results_path, 'r') as f:
        results = json.load(f)
    print("✅ Results loaded successfully")
    print(f"   Agents tested: {list(results.keys())}")
else:
    print("❌ Results file not found. Run the training script first: python train.py")
    # Create dummy data for notebook testing
    results = {
        "Causal_Weak": {
            "intervention_strong_wind": {"success_rate": 0.65, "adaptation_speed": 0.15},
            "intervention_low_visibility": {"success_rate": 0.70, "adaptation_speed": 0.12},
            "intervention_unstable_goal": {"success_rate": 0.60, "adaptation_speed": 0.10}
        },
        "Causal_Medium": {
            "intervention_strong_wind": {"success_rate": 0.75, "adaptation_speed": 0.25},
            "intervention_low_visibility": {"success_rate": 0.80, "adaptation_speed": 0.22},
            "intervention_unstable_goal": {"success_rate": 0.72, "adaptation_speed": 0.18}
        },
        "Causal_Strong": {
            "intervention_strong_wind": {"success_rate": 0.85, "adaptation_speed": 0.35},
            "intervention_low_visibility": {"success_rate": 0.88, "adaptation_speed": 0.32},
            "intervention_unstable_goal": {"success_rate": 0.82, "adaptation_speed": 0.28}
        },
        "MDL_Baseline": {
            "intervention_strong_wind": {"success_rate": 0.55, "adaptation_speed": 0.08},
            "intervention_low_visibility": {"success_rate": 0.60, "adaptation_speed": 0.05},
            "intervention_unstable_goal": {"success_rate": 0.52, "adaptation_speed": 0.03}
        }
    }
    print("📝 Using dummy data for demonstration")

## Intervention Adaptation Analysis

In [None]:
# Extract intervention adaptation data
intervention_data = []

for agent_name, agent_results in results.items():
    for test_name, metrics in agent_results.items():
        if test_name.startswith("intervention_"):
            intervention_type = test_name.replace("intervention_", "")
            intervention_data.append({
                "agent": agent_name,
                "intervention": intervention_type,
                "success_rate": metrics.get("success_rate", 0),
                "adaptation_speed": metrics.get("adaptation_speed", 0),
                "avg_reward": metrics.get("avg_reward", 0),
                "avg_steps": metrics.get("avg_steps", 0)
            })

intervention_df = pd.DataFrame(intervention_data)
print("📊 Intervention adaptation data:")
print(intervention_df.head())

In [None]:
# Create comprehensive intervention adaptation visualization
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Success Rate by Intervention', 'Adaptation Speed by Intervention', 
                   'Success Rate vs Adaptation Speed', 'Agent Performance Comparison'),
    specs=[[{"secondary_y": False}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": False}]]
)

# 1. Success rate by intervention
for agent in intervention_df['agent'].unique():
    agent_data = intervention_df[intervention_df['agent'] == agent]
    fig.add_trace(
        go.Bar(x=agent_data['intervention'], y=agent_data['success_rate'], 
               name=f'{agent} Success Rate', legendgroup=agent),
        row=1, col=1
    )

# 2. Adaptation speed by intervention
for agent in intervention_df['agent'].unique():
    agent_data = intervention_df[intervention_df['agent'] == agent]
    fig.add_trace(
        go.Bar(x=agent_data['intervention'], y=agent_data['adaptation_speed'], 
               name=f'{agent} Adaptation Speed', legendgroup=agent, showlegend=False),
        row=1, col=2
    )

# 3. Success rate vs adaptation speed scatter
colors = px.colors.qualitative.Set1
for i, agent in enumerate(intervention_df['agent'].unique()):
    agent_data = intervention_df[intervention_df['agent'] == agent]
    fig.add_trace(
        go.Scatter(x=agent_data['adaptation_speed'], y=agent_data['success_rate'],
                  mode='markers+text', name=f'{agent}', 
                  text=agent_data['intervention'], textposition="top center",
                  marker=dict(size=10, color=colors[i % len(colors)]),
                  legendgroup=agent, showlegend=False),
        row=2, col=1
    )

# 4. Overall performance comparison
avg_performance = intervention_df.groupby('agent').agg({
    'success_rate': 'mean',
    'adaptation_speed': 'mean'
}).reset_index()

fig.add_trace(
    go.Bar(x=avg_performance['agent'], y=avg_performance['success_rate'], 
           name='Average Success Rate', showlegend=False),
    row=2, col=2
)

# Update layout
fig.update_layout(
    title_text="Intervention Adaptation Analysis",
    height=800,
    showlegend=True
)

fig.update_xaxes(title_text="Intervention Type", row=1, col=1)
fig.update_xaxes(title_text="Intervention Type", row=1, col=2)
fig.update_xaxes(title_text="Adaptation Speed", row=2, col=1)
fig.update_xaxes(title_text="Agent", row=2, col=2)

fig.update_yaxes(title_text="Success Rate", row=1, col=1)
fig.update_yaxes(title_text="Adaptation Speed", row=1, col=2)
fig.update_yaxes(title_text="Success Rate", row=2, col=1)
fig.update_yaxes(title_text="Average Success Rate", row=2, col=2)

fig.show()

## Causal Transfer Learning Analysis

In [None]:
# Extract causal transfer data (if available)
transfer_data = []

for agent_name, agent_results in results.items():
    if "causal_transfer" in agent_results:
        for transfer_name, metrics in agent_results["causal_transfer"].items():
            transfer_data.append({
                "agent": agent_name,
                "transfer_type": transfer_name,
                "source_performance": metrics.get("source_performance", 0),
                "target_performance": metrics.get("target_performance", 0),
                "transfer_efficiency": metrics.get("transfer_efficiency", 0),
                "adaptation_speed": metrics.get("adaptation_speed", 0)
            })

if transfer_data:
    transfer_df = pd.DataFrame(transfer_data)
    print("🔄 Causal transfer data:")
    print(transfer_df.head())
    
    # Visualize transfer efficiency
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    
    # Transfer efficiency comparison
    sns.barplot(data=transfer_df, x='agent', y='transfer_efficiency', 
                hue='transfer_type', ax=axes[0])
    axes[0].set_title('Causal Transfer Efficiency')
    axes[0].set_ylabel('Transfer Efficiency')
    axes[0].tick_params(axis='x', rotation=45)
    
    # Source vs target performance
    for agent in transfer_df['agent'].unique():
        agent_data = transfer_df[transfer_df['agent'] == agent]
        axes[1].scatter(agent_data['source_performance'], 
                       agent_data['target_performance'], 
                       label=agent, s=100, alpha=0.7)
    
    # Add perfect transfer line
    axes[1].plot([0, 1], [0, 1], 'k--', alpha=0.5, label='Perfect Transfer')
    axes[1].set_xlabel('Source Performance')
    axes[1].set_ylabel('Target Performance')
    axes[1].set_title('Transfer Learning Performance')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("📝 No causal transfer data available. Only CausalAgent supports transfer learning.")

## Statistical Significance Testing

In [None]:
from scipy import stats

# Compare causal agents vs baseline
causal_agents = intervention_df[intervention_df['agent'].str.contains('Causal')]
baseline_agents = intervention_df[intervention_df['agent'].str.contains('MDL')]

if len(causal_agents) > 0 and len(baseline_agents) > 0:
    # Test success rate difference
    success_stat, success_p = stats.ttest_ind(
        causal_agents['success_rate'], 
        baseline_agents['success_rate']
    )
    
    # Test adaptation speed difference
    adaptation_stat, adaptation_p = stats.ttest_ind(
        causal_agents['adaptation_speed'], 
        baseline_agents['adaptation_speed']
    )
    
    print("📊 Statistical Significance Tests:")
    print(f"\nSuccess Rate Comparison:")
    print(f"   Causal Agents Mean: {causal_agents['success_rate'].mean():.3f}")
    print(f"   Baseline Agents Mean: {baseline_agents['success_rate'].mean():.3f}")
    print(f"   t-statistic: {success_stat:.3f}")
    print(f"   p-value: {success_p:.3f}")
    print(f"   Significant: {'✅ Yes' if success_p < 0.05 else '❌ No'}")
    
    print(f"\nAdaptation Speed Comparison:")
    print(f"   Causal Agents Mean: {causal_agents['adaptation_speed'].mean():.3f}")
    print(f"   Baseline Agents Mean: {baseline_agents['adaptation_speed'].mean():.3f}")
    print(f"   t-statistic: {adaptation_stat:.3f}")
    print(f"   p-value: {adaptation_p:.3f}")
    print(f"   Significant: {'✅ Yes' if adaptation_p < 0.05 else '❌ No'}")
else:
    print("⚠️ Insufficient data for statistical testing")

## Hypothesis Evaluation

In [None]:
# Load experiment configuration
manifest_path = Path("manifest.json")
if manifest_path.exists():
    with open(manifest_path, 'r') as f:
        config = json.load(f)
    hypothesis = config["experiment"]["hypothesis"]
else:
    hypothesis = "Agents with stronger causal reasoning will show better adaptation to interventions"

print("🎯 HYPOTHESIS EVALUATION")
print("=" * 50)
print(f"\nHypothesis: {hypothesis}")
print("\nEvidence Analysis:")

# Check if causal strength correlates with performance
causal_strength_map = {
    "Causal_Weak": 0.1,
    "Causal_Medium": 1.0,
    "Causal_Strong": 5.0,
    "MDL_Baseline": 0.0  # No causal reasoning
}

# Add causal strength to dataframe
intervention_df['causal_strength'] = intervention_df['agent'].map(causal_strength_map)

# Calculate correlation between causal strength and performance
strength_performance_corr = intervention_df['causal_strength'].corr(intervention_df['success_rate'])
strength_adaptation_corr = intervention_df['causal_strength'].corr(intervention_df['adaptation_speed'])

print(f"\n1. Causal Strength vs Success Rate Correlation: {strength_performance_corr:.3f}")
print(f"2. Causal Strength vs Adaptation Speed Correlation: {strength_adaptation_corr:.3f}")

# Determine if hypothesis is supported
evidence_strength = 0

if strength_performance_corr > 0.7:
    evidence_strength += 2
    print("   ✅ Strong positive correlation with success rate")
elif strength_performance_corr > 0.3:
    evidence_strength += 1
    print("   ⚠️ Moderate positive correlation with success rate")
else:
    print("   ❌ Weak or no correlation with success rate")

if strength_adaptation_corr > 0.7:
    evidence_strength += 2
    print("   ✅ Strong positive correlation with adaptation speed")
elif strength_adaptation_corr > 0.3:
    evidence_strength += 1
    print("   ⚠️ Moderate positive correlation with adaptation speed")
else:
    print("   ❌ Weak or no correlation with adaptation speed")

# Statistical significance check
if 'success_p' in locals() and success_p < 0.05:
    evidence_strength += 1
    print("   ✅ Statistically significant performance difference")

if 'adaptation_p' in locals() and adaptation_p < 0.05:
    evidence_strength += 1
    print("   ✅ Statistically significant adaptation difference")

# Final conclusion
print(f"\n📊 Evidence Strength Score: {evidence_strength}/6")
print("\n🎯 CONCLUSION:")
if evidence_strength >= 4:
    print("✅ HYPOTHESIS STRONGLY SUPPORTED")
    print("   Causal reasoning provides clear advantages for intervention adaptation")
elif evidence_strength >= 2:
    print("⚠️ HYPOTHESIS MODERATELY SUPPORTED")
    print("   Some evidence for causal reasoning benefits, but not conclusive")
else:
    print("❌ HYPOTHESIS NOT SUPPORTED")
    print("   Insufficient evidence for causal reasoning advantages")

print("\n💡 Future Research Directions:")
print("• Test with more complex causal structures")
print("• Increase sample size for stronger statistical power")
print("• Compare with other baseline methods")
print("• Analyze causal graph learning accuracy")

## Causal Graph Visualization

In [None]:
# Visualize the theoretical causal structure
import networkx as nx

# Create the ground truth causal graph
G = nx.DiGraph()

# Add nodes (causal variables)
causal_vars = ['wind_strength', 'visibility', 'goal_stability', 'movement_success', 
               'observation_quality', 'goal_position', 'agent_performance']
G.add_nodes_from(causal_vars)

# Add edges (causal relationships)
causal_edges = [
    ('wind_strength', 'movement_success'),
    ('visibility', 'observation_quality'),
    ('goal_stability', 'goal_position'),
    ('movement_success', 'agent_performance'),
    ('observation_quality', 'agent_performance'),
    ('goal_position', 'agent_performance')
]
G.add_edges_from(causal_edges)

# Create visualization
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, seed=42)

# Draw the graph
nx.draw_networkx_nodes(G, pos, node_color='lightblue', 
                      node_size=2000, alpha=0.8)
nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
nx.draw_networkx_edges(G, pos, edge_color='gray', 
                      arrows=True, arrowsize=20, arrowstyle='->')

plt.title('Ground Truth Causal Structure\n(What CausalAgent Should Learn)', 
          fontsize=16, fontweight='bold')
plt.axis('off')
plt.tight_layout()
plt.show()

print("📊 This shows the causal relationships that a perfect causal agent should discover:")
print("• Wind affects movement success")
print("• Visibility affects observation quality")
print("• Goal stability affects goal position")
print("• All of these ultimately affect agent performance")

## Export Results Summary

In [None]:
# Create summary report
summary = {
    "experiment_name": "Causality vs Correlation",
    "hypothesis": hypothesis if 'hypothesis' in locals() else "Causal reasoning improves intervention adaptation",
    "agents_tested": list(results.keys()),
    "key_findings": {
        "causal_strength_performance_correlation": strength_performance_corr if 'strength_performance_corr' in locals() else None,
        "causal_strength_adaptation_correlation": strength_adaptation_corr if 'strength_adaptation_corr' in locals() else None,
        "evidence_strength_score": evidence_strength if 'evidence_strength' in locals() else None
    },
    "performance_summary": intervention_df.groupby('agent')[['success_rate', 'adaptation_speed']].mean().to_dict() if 'intervention_df' in locals() else {},
    "statistical_tests": {
        "success_rate_p_value": success_p if 'success_p' in locals() else None,
        "adaptation_speed_p_value": adaptation_p if 'adaptation_p' in locals() else None
    }
}

# Save summary
summary_path = Path("analysis_summary.json")
with open(summary_path, 'w') as f:
    json.dump(summary, f, indent=2, default=str)

print(f"📋 Analysis summary saved to {summary_path}")
print("\n🎉 Analysis Complete!")
print("\nKey outputs:")
print("• Intervention adaptation visualizations")
print("• Statistical significance tests")
print("• Hypothesis evaluation")
print("• Causal graph visualization")
print("• Summary report (JSON)")