# StarCoder Multi-Agent System - Performance Benchmarking

This notebook provides comprehensive performance benchmarking for the StarCoder Multi-Agent System. You'll learn how to:

1. Design effective benchmark tests
2. Measure system performance metrics
3. Identify bottlenecks and optimization opportunities
4. Compare different configurations
5. Generate performance reports

## Prerequisites

- All services running optimally
- Sufficient system resources
- Benchmarking libraries: `time`, `psutil`, `memory_profiler`


In [None]:
# Import benchmarking libraries
import asyncio
import sys
import json
import time
import psutil
import statistics
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from datetime import datetime
from typing import List, Dict, Any, Tuple
from concurrent.futures import ThreadPoolExecutor
import httpx

# Add project root to Python path
sys.path.insert(0, str(Path.cwd().parent))

from orchestrator import process_simple_task, MultiAgentOrchestrator
from communication.message_schema import OrchestratorRequest

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✅ Benchmarking libraries imported successfully!")
print(f"📁 Working directory: {Path.cwd()}")
print(f"🐍 Python version: {sys.version}")
print(f"💻 CPU cores: {psutil.cpu_count()}")
print(f"🧠 Memory: {psutil.virtual_memory().total / (1024**3):.1f} GB")


## 1. System Resource Monitoring

Let's first establish baseline system performance metrics.


In [None]:
# Monitor system resources
def get_system_metrics() -> Dict[str, Any]:
    """Get current system resource metrics."""
    cpu_percent = psutil.cpu_percent(interval=1)
    memory = psutil.virtual_memory()
    disk = psutil.disk_usage('/')
    
    return {
        'timestamp': datetime.now().isoformat(),
        'cpu_percent': cpu_percent,
        'memory_total_gb': memory.total / (1024**3),
        'memory_used_gb': memory.used / (1024**3),
        'memory_percent': memory.percent,
        'disk_total_gb': disk.total / (1024**3),
        'disk_used_gb': disk.used / (1024**3),
        'disk_percent': (disk.used / disk.total) * 100
    }

# Check service health
async def check_service_health() -> Dict[str, Any]:
    """Check health of all services."""
    services = {
        'generator': 'http://localhost:9001/health',
        'reviewer': 'http://localhost:9002/health'
    }
    
    health_status = {}
    
    async with httpx.AsyncClient() as client:
        for service, url in services.items():
            try:
                response = await client.get(url, timeout=5.0)
                health_status[service] = {
                    'status': response.json()['status'],
                    'uptime': response.json().get('uptime', 0)
                }
            except Exception as e:
                health_status[service] = {
                    'status': 'unhealthy',
                    'error': str(e)
                }
    
    return health_status

# Get baseline metrics
print("🔍 Collecting baseline system metrics...")
baseline_metrics = get_system_metrics()
health_status = await check_service_health()

print("📊 Baseline System Metrics:")
print("=" * 50)
print(f"• CPU Usage: {baseline_metrics['cpu_percent']:.1f}%")
print(f"• Memory Usage: {baseline_metrics['memory_percent']:.1f}% ({baseline_metrics['memory_used_gb']:.1f}GB / {baseline_metrics['memory_total_gb']:.1f}GB)")
print(f"• Disk Usage: {baseline_metrics['disk_percent']:.1f}% ({baseline_metrics['disk_used_gb']:.1f}GB / {baseline_metrics['disk_total_gb']:.1f}GB)")

print("\n🏥 Service Health Status:")
print("=" * 50)
for service, status in health_status.items():
    if 'error' in status:
        print(f"• {service}: ❌ {status['error']}")
    else:
        print(f"• {service}: ✅ {status['status']} (uptime: {status['uptime']:.1f}s)")

# Check if services are ready for benchmarking
services_ready = all('error' not in status for status in health_status.values())
if not services_ready:
    print("\n⚠️  Warning: Some services are not healthy. Benchmark results may be affected.")
else:
    print("\n✅ All services are healthy and ready for benchmarking.")
