# StarCoder Multi-Agent System - Advanced Tutorial

This notebook demonstrates advanced features of the StarCoder Multi-Agent System including:

1. Batch processing multiple tasks
2. Error handling and recovery
3. Performance optimization
4. Custom orchestration patterns
5. Result analysis and visualization

## Prerequisites

- Complete the Basic Tutorial first
- All services running (StarCoder + Agents)
- Additional Python packages: `matplotlib`, `pandas`, `seaborn`


In [None]:
# Import required libraries
import asyncio
import sys
import json
import time
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from datetime import datetime
from typing import List, Dict, Any

# Add project root to Python path
sys.path.insert(0, str(Path.cwd().parent))

from orchestrator import MultiAgentOrchestrator
from communication.message_schema import OrchestratorRequest
from exceptions import CodeGenerationError, CodeReviewError

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✅ Advanced libraries imported successfully!")
print(f"📁 Working directory: {Path.cwd()}")
print(f"🐍 Python version: {sys.version}")


## 1. Batch Processing

Let's process multiple tasks in parallel to demonstrate efficiency gains.


In [None]:
# Define multiple tasks for batch processing
batch_tasks = [
    {
        "description": "Create a sorting algorithm using quicksort",
        "requirements": ["Use recursive approach", "Include type hints", "Handle edge cases"]
    },
    {
        "description": "Implement a binary search algorithm",
        "requirements": ["Handle edge cases", "Add comprehensive tests", "Optimize for performance"]
    },
    {
        "description": "Create a simple hash table implementation",
        "requirements": ["Use chaining for collision resolution", "Include resize functionality", "Add load factor monitoring"]
    },
    {
        "description": "Write a function to find the longest common subsequence",
        "requirements": ["Use dynamic programming", "Optimize for space", "Include memoization"]
    },
    {
        "description": "Implement a basic graph traversal (BFS)",
        "requirements": ["Use adjacency list representation", "Handle disconnected graphs", "Return shortest path"]
    }
]

print(f"📝 Processing {len(batch_tasks)} tasks in batch:")
for i, task in enumerate(batch_tasks, 1):
    print(f"   {i}. {task['description']}")

# Initialize orchestrator
orchestrator = MultiAgentOrchestrator()

# Process tasks in parallel
print("\n⏳ Processing tasks in parallel...")
start_time = time.time()

try:
    # Create requests
    requests = []
    for task_data in batch_tasks:
        request = OrchestratorRequest(
            task_description=task_data["description"],
            language="python",
            requirements=task_data["requirements"]
        )
        requests.append(request)
    
    # Process all tasks concurrently
    results = await asyncio.gather(*[orchestrator.process_task(req) for req in requests], return_exceptions=True)
    
    end_time = time.time()
    total_time = end_time - start_time
    
    print(f"✅ Batch processing completed in {total_time:.2f}s")
    
except Exception as e:
    print(f"❌ Batch processing failed: {e}")
    results = []
finally:
    await orchestrator.close()


In [None]:
# Analyze batch processing results
successful_results = []
failed_results = []

for i, result in enumerate(results):
    if isinstance(result, Exception):
        failed_results.append({
            "task": batch_tasks[i]["description"],
            "error": str(result)
        })
    elif result.success:
        successful_results.append({
            "task": batch_tasks[i]["description"],
            "workflow_time": result.workflow_time,
            "quality_score": result.review_result.code_quality_score,
            "lines_of_code": result.generation_result.metadata.lines_of_code,
            "complexity": result.generation_result.metadata.complexity,
            "tokens_used": result.generation_result.tokens_used
        })
    else:
        failed_results.append({
            "task": batch_tasks[i]["description"],
            "error": result.error_message
        })

print("📊 Batch Processing Results:")
print("=" * 50)
print(f"• Total tasks: {len(batch_tasks)}")
print(f"• Successful: {len(successful_results)}")
print(f"• Failed: {len(failed_results)}")
print(f"• Success rate: {len(successful_results)/len(batch_tasks)*100:.1f}%")
print(f"• Total time: {total_time:.2f}s")
print(f"• Average time per task: {total_time/len(batch_tasks):.2f}s")

if successful_results:
    avg_quality = sum(r["quality_score"] for r in successful_results) / len(successful_results)
    avg_time = sum(r["workflow_time"] for r in successful_results) / len(successful_results)
    total_tokens = sum(r["tokens_used"] for r in successful_results)
    
    print(f"• Average quality score: {avg_quality:.1f}/10")
    print(f"• Average workflow time: {avg_time:.2f}s")
    print(f"• Total tokens used: {total_tokens}")

# Display failed tasks
if failed_results:
    print("\n❌ Failed Tasks:")
    for failed in failed_results:
        print(f"   • {failed['task']}: {failed['error']}")


In [None]:
# Create visualizations for batch processing results
if successful_results:
    df = pd.DataFrame(successful_results)
    
    # Create comprehensive visualization
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('Batch Processing Analysis', fontsize=16)
    
    # Task completion time
    axes[0, 0].bar(range(len(df)), df['workflow_time'], color='skyblue')
    axes[0, 0].set_title('Workflow Time by Task')
    axes[0, 0].set_xlabel('Task Index')
    axes[0, 0].set_ylabel('Time (seconds)')
    axes[0, 0].tick_params(axis='x', rotation=45)
    
    # Quality scores
    axes[0, 1].bar(range(len(df)), df['quality_score'], color='lightgreen')
    axes[0, 1].set_title('Quality Score by Task')
    axes[0, 1].set_xlabel('Task Index')
    axes[0, 1].set_ylabel('Quality Score (/10)')
    axes[0, 1].set_ylim(0, 10)
    
    # Lines of code
    axes[0, 2].bar(range(len(df)), df['lines_of_code'], color='orange')
    axes[0, 2].set_title('Lines of Code by Task')
    axes[0, 2].set_xlabel('Task Index')
    axes[0, 2].set_ylabel('Lines of Code')
    
    # Complexity distribution
    complexity_counts = df['complexity'].value_counts()
    axes[1, 0].pie(complexity_counts.values, labels=complexity_counts.index, autopct='%1.1f%%')
    axes[1, 0].set_title('Complexity Distribution')
    
    # Time vs Quality scatter
    axes[1, 1].scatter(df['workflow_time'], df['quality_score'], 
                      c=df['lines_of_code'], cmap='viridis', s=100)
    axes[1, 1].set_title('Time vs Quality Score')
    axes[1, 1].set_xlabel('Workflow Time (s)')
    axes[1, 1].set_ylabel('Quality Score (/10)')
    
    # Token usage
    axes[1, 2].bar(range(len(df)), df['tokens_used'], color='purple')
    axes[1, 2].set_title('Token Usage by Task')
    axes[1, 2].set_xlabel('Task Index')
    axes[1, 2].set_ylabel('Tokens Used')
    
    plt.tight_layout()
    plt.show()
    
    # Display detailed results table
    print("\n📋 Detailed Results:")
    print("=" * 80)
    display_df = df[['task', 'workflow_time', 'quality_score', 'lines_of_code', 'complexity', 'tokens_used']]
    print(display_df.to_string(index=False))
else:
    print("❌ No successful results to visualize")
