In [None]:
# =============================================================================
# CELL 1: Setup et Installation
# =============================================================================
import os
import sys

# Detect environment
IN_COLAB = 'google.colab' in sys.modules
IN_KAGGLE = os.path.exists('/kaggle')

print(f"Environment: {'Colab' if IN_COLAB else 'Kaggle' if IN_KAGGLE else 'Local'}")

if IN_COLAB:
    # Mount Google Drive for saving results
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Clone the repository
    !git clone https://github.com/elonmj/Code-traffic-flow.git 2>/dev/null || echo "Repo already exists"
    %cd Code-traffic-flow
    
    # Install dependencies
    !pip install -q stable-baselines3 gymnasium numba cupy-cuda12x
    
    PROJECT_ROOT = '/content/Code-traffic-flow'
    OUTPUT_DIR = '/content/drive/MyDrive/thesis_rl_results'
elif IN_KAGGLE:
    !pip install -q stable-baselines3 gymnasium
    PROJECT_ROOT = '/kaggle/input/code-traffic-flow'
    OUTPUT_DIR = '/kaggle/working/thesis_rl_results'
else:
    PROJECT_ROOT = os.getcwd()
    OUTPUT_DIR = os.path.join(PROJECT_ROOT, 'results', 'thesis_rl_results')

sys.path.insert(0, PROJECT_ROOT)
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"Output directory: {OUTPUT_DIR}")

In [None]:
# =============================================================================
# CELL 2: Imports et Configuration
# =============================================================================
import json
import time
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# RL imports
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import BaseCallback

# Project imports
from Code_RL.src.env.traffic_signal_env_direct_v3 import TrafficSignalEnvDirectV3
from arz_model.config import create_victoria_island_config

print("‚úÖ All imports successful!")

# =============================================================================
# CONFIGURATION
# =============================================================================
CONFIG = {
    # Training
    'timesteps': 5000,           # 5000 steps for quick demo
    'eval_episodes': 5,          # Evaluation episodes
    
    # Traffic scenario (CONGESTED)
    'default_density': 120.0,    # veh/km - Rush hour
    'inflow_density': 180.0,     # veh/km - High inflow
    't_final': 450.0,            # Simulation time (seconds)
    'decision_interval': 15.0,   # Decision interval (seconds)
    
    # Reward weights (optimized for congestion)
    'alpha': 5.0,   # Density penalty (HIGH)
    'kappa': 0.3,   # Switch penalty
    'mu': 0.1,      # Throughput reward (LOW)
    
    # DQN hyperparameters
    'learning_rate': 1e-4,
    'buffer_size': 10000,
    'learning_starts': 500,
    'batch_size': 64,
    'gamma': 0.99,
    'exploration_fraction': 0.3,
    'exploration_final_eps': 0.05,
}

print(f"\nüìä Configuration:")
print(f"   Timesteps: {CONFIG['timesteps']}")
print(f"   Density: {CONFIG['default_density']} ‚Üí {CONFIG['inflow_density']} veh/km")
print(f"   Reward weights: Œ±={CONFIG['alpha']}, Œ∫={CONFIG['kappa']}, Œº={CONFIG['mu']}")

In [None]:
# =============================================================================
# CELL 3: Environment Factory
# =============================================================================
def create_env(config=CONFIG, quiet=True):
    """Create traffic environment with congested scenario."""
    arz_config = create_victoria_island_config(
        t_final=config['t_final'],
        output_dt=config['decision_interval'],
        cells_per_100m=4,
        default_density=config['default_density'],
        inflow_density=config['inflow_density'],
        use_cache=False
    )
    
    arz_config.rl_metadata = {
        'observation_segment_ids': [s.id for s in arz_config.segments],
        'decision_interval': config['decision_interval'],
    }
    
    env = TrafficSignalEnvDirectV3(
        simulation_config=arz_config,
        decision_interval=config['decision_interval'],
        observation_segment_ids=None,
        reward_weights={
            'alpha': config['alpha'],
            'kappa': config['kappa'],
            'mu': config['mu']
        },
        quiet=quiet
    )
    return env

# Test environment
print("Creating test environment...")
test_env = create_env(quiet=True)
obs, _ = test_env.reset()
print(f"‚úÖ Environment created!")
print(f"   Observation shape: {obs.shape}")
print(f"   Action space: {test_env.action_space}")
test_env.close()

In [None]:
# =============================================================================
# CELL 4: Training Callback for Logging
# =============================================================================
class TrainingCallback(BaseCallback):
    """Callback to track training progress."""
    
    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []
        self.current_rewards = 0
        self.current_length = 0
        self.timestep_rewards = []  # For plotting
        
    def _on_step(self) -> bool:
        # Track rewards
        reward = self.locals.get('rewards', [0])[0]
        self.current_rewards += reward
        self.current_length += 1
        
        # Save every 100 steps for plotting
        if self.num_timesteps % 100 == 0:
            self.timestep_rewards.append({
                'timestep': self.num_timesteps,
                'reward': self.current_rewards
            })
        
        # Check for episode end
        done = self.locals.get('dones', [False])[0]
        if done:
            self.episode_rewards.append(self.current_rewards)
            self.episode_lengths.append(self.current_length)
            self.current_rewards = 0
            self.current_length = 0
            
        return True

print("‚úÖ Callback defined")

In [None]:
# =============================================================================
# CELL 5: Baseline Evaluation (Fixed-Time Controller)
# =============================================================================
def evaluate_policy(env, policy_type='random', model=None, n_episodes=5, 
                   fixed_interval=30.0):
    """Evaluate a policy over multiple episodes."""
    results = {'rewards': [], 'densities': [], 'throughputs': []}
    
    for ep in range(n_episodes):
        obs, _ = env.reset()
        done = truncated = False
        ep_reward = 0.0
        ep_densities = []
        time_since_switch = 0.0
        
        while not (done or truncated):
            if policy_type == 'model':
                action, _ = model.predict(obs, deterministic=True)
            elif policy_type == 'fixed_time':
                time_since_switch += env.decision_interval
                action = 1 if time_since_switch >= fixed_interval else 0
                if action == 1:
                    time_since_switch = 0.0
            else:  # random
                action = env.action_space.sample()
            
            obs, reward, done, truncated, info = env.step(action)
            ep_reward += reward
            if 'avg_density' in info:
                ep_densities.append(info['avg_density'])
        
        results['rewards'].append(ep_reward)
        results['densities'].append(np.mean(ep_densities) if ep_densities else 0)
    
    return {
        'mean_reward': float(np.mean(results['rewards'])),
        'std_reward': float(np.std(results['rewards'])),
        'mean_density': float(np.mean(results['densities'])),
        'all_rewards': results['rewards']
    }

# Evaluate baseline
print("üìä Evaluating baseline (Random policy)...")
baseline_env = create_env(quiet=True)
baseline_results = evaluate_policy(baseline_env, policy_type='random', 
                                   n_episodes=CONFIG['eval_episodes'])
baseline_env.close()

print(f"\n‚úÖ Baseline Results:")
print(f"   Mean Reward: {baseline_results['mean_reward']:.2f} ¬± {baseline_results['std_reward']:.2f}")
print(f"   Mean Density: {baseline_results['mean_density']:.4f}")

In [None]:
# =============================================================================
# CELL 6: DQN Training
# =============================================================================
print("="*60)
print("üöÄ STARTING DQN TRAINING")
print("="*60)
print(f"Timesteps: {CONFIG['timesteps']}")
print(f"Learning rate: {CONFIG['learning_rate']}")

# Create environment
train_env = create_env(quiet=True)

# Create model
model = DQN(
    "MlpPolicy",
    train_env,
    learning_rate=CONFIG['learning_rate'],
    buffer_size=CONFIG['buffer_size'],
    learning_starts=CONFIG['learning_starts'],
    batch_size=CONFIG['batch_size'],
    gamma=CONFIG['gamma'],
    exploration_fraction=CONFIG['exploration_fraction'],
    exploration_final_eps=CONFIG['exploration_final_eps'],
    verbose=0  # Quiet mode
)

# Create callback
callback = TrainingCallback()

# Train
start_time = time.time()
model.learn(total_timesteps=CONFIG['timesteps'], callback=callback, progress_bar=True)
training_time = time.time() - start_time

print(f"\n‚úÖ Training completed in {training_time:.1f}s ({training_time/60:.1f} min)")
print(f"   Episodes completed: {len(callback.episode_rewards)}")
if callback.episode_rewards:
    print(f"   Final episode reward: {callback.episode_rewards[-1]:.2f}")

In [None]:
# =============================================================================
# CELL 7: Evaluate Trained Agent
# =============================================================================
print("üìä Evaluating trained agent...")

rl_results = evaluate_policy(train_env, policy_type='model', model=model,
                            n_episodes=CONFIG['eval_episodes'])
train_env.close()

# Calculate improvement
improvement = ((rl_results['mean_reward'] - baseline_results['mean_reward']) 
               / abs(baseline_results['mean_reward']) * 100)

print(f"\n" + "="*60)
print("üìà RESULTS COMPARISON")
print("="*60)
print(f"Baseline (Random):  {baseline_results['mean_reward']:.2f} ¬± {baseline_results['std_reward']:.2f}")
print(f"DQN Agent:          {rl_results['mean_reward']:.2f} ¬± {rl_results['std_reward']:.2f}")
print(f"Improvement:        {improvement:+.2f}%")
print("="*60)

In [None]:
# =============================================================================
# CELL 8: Generate Figures for Thesis
# =============================================================================
print("üìä Generating thesis figures...")

# Figure 1: Training Curve
fig1, ax1 = plt.subplots(figsize=(10, 6))

if callback.episode_rewards:
    episodes = range(1, len(callback.episode_rewards) + 1)
    ax1.plot(episodes, callback.episode_rewards, 'b-', linewidth=2, label='Episode Reward')
    
    # Moving average
    if len(callback.episode_rewards) >= 3:
        window = min(5, len(callback.episode_rewards))
        ma = np.convolve(callback.episode_rewards, np.ones(window)/window, mode='valid')
        ax1.plot(range(window, len(callback.episode_rewards)+1), ma, 'r-', 
                linewidth=2, label=f'Moving Avg ({window} ep)')

ax1.set_xlabel('Episode', fontsize=12)
ax1.set_ylabel('Cumulative Reward', fontsize=12)
ax1.set_title('DQN Training Progress - Traffic Signal Control', fontsize=14)
ax1.legend()
ax1.grid(True, alpha=0.3)

fig1_path = os.path.join(OUTPUT_DIR, 'fig_8_training_curve.png')
fig1.savefig(fig1_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {fig1_path}")
plt.show()

# Figure 2: Comparison Bar Chart
fig2, ax2 = plt.subplots(figsize=(10, 6))

methods = ['Random\n(Baseline)', 'DQN Agent']
rewards = [baseline_results['mean_reward'], rl_results['mean_reward']]
stds = [baseline_results['std_reward'], rl_results['std_reward']]
colors = ['#ff7f0e', '#2ca02c']

bars = ax2.bar(methods, rewards, yerr=stds, capsize=5, color=colors, edgecolor='black')
ax2.set_ylabel('Mean Cumulative Reward', fontsize=12)
ax2.set_title(f'Performance Comparison: DQN vs Baseline\n(Improvement: {improvement:+.1f}%)', fontsize=14)
ax2.grid(True, axis='y', alpha=0.3)

# Add value labels
for bar, val in zip(bars, rewards):
    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 10, 
            f'{val:.1f}', ha='center', va='bottom', fontsize=11)

fig2_path = os.path.join(OUTPUT_DIR, 'fig_8_comparison.png')
fig2.savefig(fig2_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {fig2_path}")
plt.show()

In [None]:
# =============================================================================
# CELL 9: Save Results and Model
# =============================================================================
print("üíæ Saving results and model...")

# Save model
model_path = os.path.join(OUTPUT_DIR, 'dqn_traffic_signal')
model.save(model_path)
print(f"‚úÖ Model saved: {model_path}")

# Save results JSON
results_summary = {
    'config': CONFIG,
    'baseline': baseline_results,
    'dqn': rl_results,
    'improvement_percent': improvement,
    'training_time_seconds': training_time,
    'training_episodes': len(callback.episode_rewards),
    'episode_rewards': callback.episode_rewards,
}

results_path = os.path.join(OUTPUT_DIR, 'results_summary.json')
with open(results_path, 'w') as f:
    json.dump(results_summary, f, indent=2)
print(f"‚úÖ Results saved: {results_path}")

# List all files
print(f"\nüìÅ Files in output directory:")
for f in os.listdir(OUTPUT_DIR):
    fpath = os.path.join(OUTPUT_DIR, f)
    size = os.path.getsize(fpath) / 1024
    print(f"   {f} ({size:.1f} KB)")

In [None]:
# =============================================================================
# CELL 10: Download Results (Colab Only)
# =============================================================================
if IN_COLAB:
    from google.colab import files
    import shutil
    
    # Create ZIP archive
    zip_name = 'thesis_rl_results'
    shutil.make_archive(f'/content/{zip_name}', 'zip', OUTPUT_DIR)
    
    print("\nüì• Downloading results...")
    print("   (A download dialog should appear)")
    
    # Download ZIP
    files.download(f'/content/{zip_name}.zip')
    
    print("\n‚úÖ Download initiated!")
    print(f"   Archive contains: fig_8_training_curve.png, fig_8_comparison.png,")
    print(f"   results_summary.json, dqn_traffic_signal.zip")
else:
    print(f"\nüìÅ Results saved locally in: {OUTPUT_DIR}")
    print("   Copy figures to: images/chapter3/")

## üìä R√©sum√© Final

### R√©sultats de l'entra√Ænement:
- **Timesteps**: 5000
- **Baseline (Random)**: Voir cellule 7
- **DQN Agent**: Voir cellule 7
- **Am√©lioration**: Voir cellule 7

### Fichiers g√©n√©r√©s:
1. `fig_8_training_curve.png` - Courbe d'apprentissage
2. `fig_8_comparison.png` - Comparaison baseline vs DQN
3. `results_summary.json` - Donn√©es num√©riques
4. `dqn_traffic_signal.zip` - Mod√®le entra√Æn√©

### Pour la th√®se:
Copiez les figures PNG dans `images/chapter3/` et mettez √† jour `section8_evaluation_rl.tex`.