In [1]:
# 🔧 PRECONDITION CHECKS
# Verify environment setup and dependencies

import os
import sys
from pathlib import Path

print("🔍 ENVIRONMENT VALIDATION")
print("=" * 50)

# Check Python environment
print(f"🐍 Python: {sys.version.split()[0]}")
print(f"📂 Working Directory: {os.getcwd()}")

# Check required files
required_files = ['config.yaml', 'run.py', 'auction_env.py']
for file in required_files:
    if Path(file).exists():
        print(f"✅ {file}: Found")
    else:
        print(f"❌ {file}: Missing")

# Check dependencies
try:
    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    import gymnasium as gym
    import torch
    print("✅ Core Dependencies: Installed")
except ImportError as e:
    print(f"❌ Dependencies: Missing - {e}")

# Check LLM capability
gemini_key = os.getenv("GEMINI_API_KEY")
if gemini_key and gemini_key != "your_api_key_here":
    print(f"✅ Gemini API: Configured")
else:
    print("⚠️  Gemini API: Not configured (will use heuristic seller)")

# Create reports directory
Path("reports").mkdir(exist_ok=True)
print("✅ Reports Directory: Ready")

print("\n🎯 SYSTEM STATUS: Ready for phase execution")
print("💡 Tip: Each phase generates detailed reports in ./reports/")


🔍 ENVIRONMENT VALIDATION
🐍 Python: 3.10.18
📂 Working Directory: /Users/garychen/Desktop/auction_simulator
✅ config.yaml: Found
✅ run.py: Found
✅ auction_env.py: Found
✅ Core Dependencies: Installed
✅ Gemini API: Configured
✅ Reports Directory: Ready

🎯 SYSTEM STATUS: Ready for phase execution
💡 Tip: Each phase generates detailed reports in ./reports/


In [2]:
# 🔍 PHASE 0: FOUNDATION & VALIDATION
# Single episode smoke test to verify core functionality

print("🚀 EXECUTING PHASE 0: Foundation & Validation")
print("=" * 60)
print("📋 Objective: Verify environment stability and action parsing")
print("⏱️  Duration: ~10 seconds")
print()

# Execute Phase 0
import subprocess
import sys

try:
    result = subprocess.run(
        [sys.executable, "run.py", "--phase", "0"],
        capture_output=True,
        text=True,
        timeout=30
    )
    
    print("📊 EXECUTION OUTPUT:")
    print("-" * 40)
    print(result.stdout)
    
    if result.stderr:
        print("⚠️  Warnings/Errors:")
        print(result.stderr)
    
    if result.returncode == 0:
        print("\n✅ PHASE 0 STATUS: SUCCESS")
        print("🎯 Key Validation: Environment loop, action parsing, episode completion")
        print("📈 Next Step: Ready for Phase 1 (Monte Carlo baseline)")
    else:
        print(f"\n❌ PHASE 0 STATUS: FAILED (exit code {result.returncode})")
        
except subprocess.TimeoutExpired:
    print("⏰ TIMEOUT: Phase 0 execution exceeded 30 seconds")
except Exception as e:
    print(f"💥 ERROR: {e}")

print("\n📄 Reports: Phase 0 generates console output only (no file report)")


🚀 EXECUTING PHASE 0: Foundation & Validation
📋 Objective: Verify environment stability and action parsing
⏱️  Duration: ~10 seconds

📊 EXECUTION OUTPUT:
----------------------------------------
✅ Environment variables loaded from .env file

INFO:__main__:🚀 Running Phase 0: Smoke Test (1 heuristic episode)
INFO:__main__:
INFO:__main__:🏠              AUCTION EPISODE 0 STARTING              🏠
INFO:__main__:🤖 POLICY TYPE: Heuristic
INFO:__main__:💰 STARTING PRICE: $8,000
INFO:__main__:📊 RESERVE PRICE: $9,800
INFO:__main__:👥 BUYERS: 5 personas with different strategies
INFO:__main__:
INFO:__main__:🏠 ROUND 1
INFO:__main__:💰 Current Price: $9,000
INFO:__main__:👥 Active Buyers: 4/5
INFO:__main__:🎯 Leading Bidder: B2_AGGRESSIVE_TRADER
INFO:__main__:📊 Reserve Price: $9,800 ❌ NOT MET
INFO:__main__:
🤖 SELLER ACTION: [ANNOUNCE (continue auction)]
INFO:__main__:💬 "Heuristic seller decision"
INFO:__main__:
👥 BUYER ACTIONS:
INFO:__main__:💵 NEW BIDS:
INFO:__main__:   • B2_AGGRESSIVE_TRADER: $9,000
INFO:

In [6]:
# 📊 PHASE 1: MONTE CARLO BASELINE
# Generate statistical baseline with heuristic policies

# Configuration
EPISODES = 100000  # Adjust this for different baseline sizes (50-1000+)

print("🚀 EXECUTING PHASE 1: Monte Carlo Baseline")
print("=" * 60)
print("📋 Objective: Establish heuristic baseline with comprehensive analytics")
print(f"📊 Episodes: {EPISODES:,}")
print("⏱️  Duration: ~1-2 minutes (depending on episode count)")
print("📈 Metrics: Allocative efficiency, revenue efficiency, persona performance")
print()

# Execute Phase 1
import subprocess
import sys
from pathlib import Path

try:
    result = subprocess.run(
        [sys.executable, "run.py", "--phase", "1", "--episodes", str(EPISODES)],
        capture_output=True,
        text=True,
        timeout=600  # 10 minutes max
    )
    
    print("📊 EXECUTION SUMMARY:")
    print("-" * 40)
    # Show last 20 lines of output
    output_lines = result.stdout.strip().split('\n')
    for line in output_lines[-20:]:
        print(line)
    
    if result.stderr:
        print("\n⚠️  Warnings:")
        print(result.stderr[-500:])  # Last 500 chars
    
    if result.returncode == 0:
        print("\n✅ PHASE 1 STATUS: SUCCESS")
        print("🎯 Key Deliverables:")
        print("   • Statistical baseline established")
        print("   • Persona behavioral patterns analyzed")
        print("   • Economic efficiency metrics calculated")
        print("📊 Detailed Report: ./reports/phase1_analysis.md")
        print("📈 Next Step: Ready for Phase 2 (RL training)")
        
        # Check if report was generated
        if Path("reports/phase1_analysis.md").exists():
            print("✅ Analysis report generated successfully")
        else:
            print("⚠️  Analysis report not found")
            
    else:
        print(f"\n❌ PHASE 1 STATUS: FAILED (exit code {result.returncode})")
        
except subprocess.TimeoutExpired:
    print("⏰ TIMEOUT: Phase 1 execution exceeded 10 minutes")
    print("💡 Tip: This may indicate performance issues or excessive episode count")
except Exception as e:
    print(f"💥 ERROR: {e}")

print("\n📄 PHASE 1 OUTPUTS:")
print("   📊 ./reports/phase1_analysis.md - Comprehensive statistical analysis")
print("   📈 ./reports/phase1_results.csv - Raw episode data")
print("   📋 Console output - Episode summaries and progress")


🚀 EXECUTING PHASE 1: Monte Carlo Baseline
📋 Objective: Establish heuristic baseline with comprehensive analytics
📊 Episodes: 100,000
⏱️  Duration: ~1-2 minutes (depending on episode count)
📈 Metrics: Allocative efficiency, revenue efficiency, persona performance

📊 EXECUTION SUMMARY:
----------------------------------------
🏁 PHASE 1 MONTE CARLO ANALYSIS SUMMARY

📊 DATASET OVERVIEW:
   • Total Episodes: 100000
   • Success Rate: 100.0%
   • Reserve Met Rate: 100.0%

💰 FINANCIAL SUMMARY:
   • Average Price: $11,717
   • Price Volatility: $739
   • Total Economic Welfare: $4,451

⚡ EFFICIENCY METRICS:
   • Welfare Efficiency: 85.6%
   • Allocative Efficiency: 31.9%
   • Revenue Efficiency: 36.9%

🎯 KEY INSIGHTS:

/100000 (elapsed: 21.4s)
INFO:__main__:Episode 99950/100000 (elapsed: 21.4s)
INFO:__main__:Episode 99960/100000 (elapsed: 21.4s)
INFO:__main__:Episode 99970/100000 (elapsed: 21.4s)
INFO:__main__:Episode 99980/100000 (elapsed: 21.4s)
INFO:__main__:Episode 99990/100000 (elapsed: 2

In [3]:
# 🧠 PHASE 2: REINFORCEMENT LEARNING
# Train PPO agents and compare against heuristic baseline

# Configuration
TRAIN_EPISODES = 1000  # RL training episodes (500-2000+)
EVAL_EPISODES = 200    # Evaluation episodes (100-500)

print("🚀 EXECUTING PHASE 2: Reinforcement Learning")
print("=" * 60)
print("📋 Objective: Train RL agents and benchmark against heuristic baseline")
print(f"🧠 Training Episodes: {TRAIN_EPISODES:,}")
print(f"📊 Evaluation Episodes: {EVAL_EPISODES:,}")
print("⏱️  Duration: ~10-30 minutes (depending on episode counts)")
print("🧠 Method: PPO with persona-aware reward shaping")
print("📈 Metrics: Learning curves, policy convergence, strategic improvement")
print()

# Execute Phase 2
try:
    result = subprocess.run(
        [sys.executable, "run.py", "--phase", "2", 
         "--train-episodes", str(TRAIN_EPISODES), 
         "--eval-episodes", str(EVAL_EPISODES)],
        capture_output=True,
        text=True,
        timeout=1800  # 30 minutes max
    )
    
    print("📊 EXECUTION SUMMARY:")
    print("-" * 40)
    # Show last 25 lines of output
    output_lines = result.stdout.strip().split('\n')
    for line in output_lines[-25:]:
        print(line)
    
    if result.stderr:
        print("\n⚠️  Warnings:")
        print(result.stderr[-500:])  # Last 500 chars
    
    if result.returncode == 0:
        print("\n✅ PHASE 2 STATUS: SUCCESS")
        print("🎯 Key Deliverables:")
        print("   • RL agents trained and evaluated")
        print("   • Learning curves and convergence analysis")
        print("   • Comparative performance vs heuristic baseline")
        print("   • Strategic behavior evolution documented")
        print("📊 Detailed Report: ./reports/phase2_analysis.md")
        print("🏆 Project Complete: Full multi-agent RL auction system")
        
        # Check if report was generated
        if Path("reports/phase2_analysis.md").exists():
            print("✅ RL analysis report generated successfully")
        else:
            print("⚠️  RL analysis report not found")
            
    else:
        print(f"\n❌ PHASE 2 STATUS: FAILED (exit code {result.returncode})")
        print("💡 Tip: RL training requires significant compute time and may need parameter tuning")
        
except subprocess.TimeoutExpired:
    print("⏰ TIMEOUT: Phase 2 execution exceeded 30 minutes")
    print("💡 Tip: RL training is compute-intensive. Consider reducing episode counts for testing.")
except Exception as e:
    print(f"💥 ERROR: {e}")


🚀 EXECUTING PHASE 2: Reinforcement Learning
📋 Objective: Train RL agents and benchmark against heuristic baseline
🧠 Training Episodes: 1,000
📊 Evaluation Episodes: 200
⏱️  Duration: ~10-30 minutes (depending on episode counts)
🧠 Method: PPO with persona-aware reward shaping
📈 Metrics: Learning curves, policy convergence, strategic improvement

📊 EXECUTION SUMMARY:
----------------------------------------
📁 Saved: phase2_plot2_price_distribution.png
📁 Saved: phase2_plot3_surplus_comparison.png
📁 Saved: phase2_plot4_learning_curves.png
📁 Saved: phase2_plot5_market_evolution.png
📁 Saved: phase2_plot6_efficiency_comparison.png
✅ Generated 6 individual visualization files
📄 Generated comprehensive RL report: phase2_rl_report.md

🎯 PHASE 2 RL ANALYSIS SUMMARY
📊 DATASET COMPARISON:
   • RL Training Episodes: 20.0 avg rounds
   • Success Rate Change: +0.0pp

🤖 RL TRAINING EFFECTIVENESS:
   • Agents Improving: 0/5
   • Avg Reward Change: +0.0

⚡ EFFICIENCY COMPARISON:
   • Allocative Efficiency