# Experiment Results Notebook

Document and analyze experimental results from agent training and evaluation.

In [None]:
import sys
sys.path.insert(0, '..')

from src.utils import MetricsCollector, Visualizer
from datetime import datetime

## Experiment Metadata

In [None]:
experiment = {
    'name': 'Agent Comparison Study',
    'date': datetime.now().isoformat(),
    'description': 'Comparing different agent architectures on navigation task',
    'parameters': {
        'episodes': 100,
        'max_steps': 200,
        'environment': 'Simulator',
        'world_size': (100, 100),
        'num_resources': 15,
    }
}

print(f"Experiment: {experiment['name']}")
print(f"Date: {experiment['date']}")
print(f"Description: {experiment['description']}")

## Sample Results Data

In practice, these would be loaded from saved experiment files.

In [None]:
# Sample experiment results
import random
random.seed(42)

# Simulated results from multiple runs
experiment_results = {
    'autonomous_agent': {
        'rewards': [random.gauss(50, 10) for _ in range(100)],
        'steps': [random.randint(80, 200) for _ in range(100)],
        'success_rate': 0.72,
    },
    'learning_agent': {
        'rewards': [random.gauss(45 + i*0.3, 8) for i in range(100)],
        'steps': [random.randint(70, 180) for _ in range(100)],
        'success_rate': 0.68,
    },
    'reasoning_agent': {
        'rewards': [random.gauss(55, 12) for _ in range(100)],
        'steps': [random.randint(90, 190) for _ in range(100)],
        'success_rate': 0.75,
    },
}

print(f"Loaded results for {len(experiment_results)} agents")

## Results Analysis

In [None]:
import statistics

viz = Visualizer()

# Analyze each agent
for agent_name, data in experiment_results.items():
    rewards = data['rewards']
    
    print(f"\n### {agent_name.replace('_', ' ').title()} ###")
    print(f"  Mean Reward: {statistics.mean(rewards):.2f}")
    print(f"  Std Dev: {statistics.stdev(rewards):.2f}")
    print(f"  Success Rate: {data['success_rate']:.0%}")
    print(f"  Progress: {viz.sparkline(rewards[:20], width=20)}")

## Comparative Visualization

In [None]:
# Success rate comparison
success_rates = {
    name.replace('_', ' ').title(): data['success_rate'] * 100
    for name, data in experiment_results.items()
}

print("Success Rate Comparison:")
print(viz.bar_chart(success_rates))

## Learning Curves

In [None]:
# Show learning curve for learning agent
learning_rewards = experiment_results['learning_agent']['rewards']

print("Learning Agent - Reward Over Time:")
print(viz.line_chart(
    learning_rewards[:50],
    config=viz.config,
))

## Statistical Significance

In [None]:
# Simple comparison of means
all_means = {
    name: statistics.mean(data['rewards'])
    for name, data in experiment_results.items()
}

best_agent = max(all_means, key=all_means.get)
print(f"\nBest performing agent: {best_agent}")
print(f"Mean reward: {all_means[best_agent]:.2f}")

## Conclusions

Based on this experiment:
1. The reasoning agent achieved the highest success rate
2. The learning agent showed improvement over time
3. All agents performed within expected parameters