In [4]:
# Cell 1: Clone repo and install dependencies
import os

REPO_URL = "https://github.com/elonmj/Code-traffic-flow.git"
REPO_DIR = "/content/Code-traffic-flow"

if os.path.exists(REPO_DIR):
    print(f"Repository already exists at {REPO_DIR}")
    %cd {REPO_DIR}
    !git pull
else:
    !git clone {REPO_URL} {REPO_DIR}
    %cd {REPO_DIR}

!pip install stable-baselines3 gymnasium numba --quiet
print(f"‚úÖ Setup complete | Working dir: {os.getcwd()}")

Cloning into '/content/Code-traffic-flow'...
remote: Enumerating objects: 8994, done.[K
remote: Counting objects: 100% (222/222), done.[K
remote: Compressing objects: 100% (155/155), done.[K
remote: Total 8994 (delta 114), reused 160 (delta 61), pack-reused 8772 (from 2)[K
Receiving objects: 100% (8994/8994), 314.68 MiB | 16.06 MiB/s, done.
Resolving deltas: 100% (4610/4610), done.
/content/Code-traffic-flow
‚úÖ Setup complete | Working dir: /content/Code-traffic-flow


In [5]:
# Cell 2: Imports
import sys
sys.path.insert(0, REPO_DIR)

import numpy as np
import torch
import time
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import BaseCallback

from arz_model.config import create_victoria_island_config
from Code_RL.src.env.traffic_signal_env_direct_v3 import TrafficSignalEnvDirectV3

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

PyTorch: 2.9.0+cu126
CUDA: True
GPU: Tesla T4


In [6]:
# Cell 3: Configuration
DEFAULT_DENSITY = 120.0  # Congested scenario
INFLOW_DENSITY = 180.0
REWARD_WEIGHTS = {'alpha': 5.0, 'kappa': 0.3, 'mu': 0.1}

def create_env(quiet=True):
    """Create traffic environment"""
    config = create_victoria_island_config(
        t_final=450.0, output_dt=15.0, cells_per_100m=4,
        default_density=DEFAULT_DENSITY, inflow_density=INFLOW_DENSITY, use_cache=False
    )
    config.rl_metadata = {'observation_segment_ids': [s.id for s in config.segments], 'decision_interval': 15.0}
    
    class SimpleConfig:
        def __init__(self, c):
            self.arz_simulation_config = c
            self.rl_env_params = {'dt_decision': 15.0, 'observation_segment_ids': None, 'reward_weights': REWARD_WEIGHTS}
    
    return TrafficSignalEnvDirectV3(
        simulation_config=SimpleConfig(config).arz_simulation_config,
        decision_interval=15.0, reward_weights=REWARD_WEIGHTS, quiet=quiet
    )

print(f"‚úÖ Environment factory ready (density={DEFAULT_DENSITY}, inflow={INFLOW_DENSITY})")

‚úÖ Environment factory ready (density=120.0, inflow=180.0)


In [7]:
# Cell 4: Evaluate Baselines (Random, FT-30s, FT-60s, FT-90s)
env = create_env()
baseline_results = {}

def eval_fixed_time(env, interval, n_ep=3):
    rewards = []
    for _ in range(n_ep):
        obs, _ = env.reset()
        done, ep_r, t = False, 0.0, 0.0
        while not done:
            t += env.decision_interval
            action = 1 if t >= interval else 0
            if action == 1: t = 0.0
            obs, r, done, _, _ = env.step(action)
            ep_r += r
        rewards.append(ep_r)
    return {'mean_reward': np.mean(rewards), 'std_reward': np.std(rewards)}

# Random
print("üé≤ Random...")
rnd = [sum([env.step(env.action_space.sample())[1] for _ in range(30)]) for _ in [env.reset() for _ in range(3)]]
baseline_results['Random'] = {'mean_reward': np.mean(rnd), 'std_reward': np.std(rnd)}

# Fixed-time baselines
for name, interval in [('FT-30s', 30), ('FT-60s', 60), ('FT-90s', 90)]:
    print(f"‚è±Ô∏è {name}...")
    baseline_results[name] = eval_fixed_time(env, interval)

print("\nüìä BASELINES:")
for n, d in sorted(baseline_results.items(), key=lambda x: x[1]['mean_reward'], reverse=True):
    print(f"  {n:10s}: {d['mean_reward']:>8.1f} ¬± {d['std_reward']:.1f}")


üè≠ VICTORIA ISLAND CONFIG FACTORY - GLOBAL CONFIGURATION GENERATION
   üìä Loading topology from: /content/Code-traffic-flow/arz_model/data/fichier_de_travail_corridor_utf8.csv
üè≠ VICTORIA ISLAND CONFIG FACTORY - GLOBAL CONFIGURATION GENERATION
   üìä Loading topology from: /content/Code-traffic-flow/arz_model/data/fichier_de_travail_corridor_utf8.csv
   ‚úÖ Loaded 70 edges from topology
   üîó Building directed graph...
   ‚úÖ Graph built: 60 nodes, 70 edges
   üß† Analyzing network structure (global reflection)...
   ‚úÖ Network analysis complete:
      - Entry points: 4
      - Exit points: 4
      - Junctions: 15
      - Simple pass-through nodes: 37
   ‚úÖ Loaded 70 edges from topology
   üîó Building directed graph...
   ‚úÖ Graph built: 60 nodes, 70 edges
   üß† Analyzing network structure (global reflection)...
   ‚úÖ Network analysis complete:
      - Entry points: 4
      - Exit points: 4
      - Junctions: 15
      - Simple pass-through nodes: 37


  return datetime.utcnow().replace(tzinfo=utc)


   üö¶ Detected 8 signalized nodes from OSM data

   üîß Generating segment configurations...
   ‚úÖ Created 70 segment configurations

   üîß Generating node configurations...
   ‚úÖ Created 60 node configurations

   ‚öôÔ∏è  Setting up time and physics parameters...

   üî® Assembling complete network configuration...


   üîß Generating segment configurations...
   ‚úÖ Created 70 segment configurations

   üîß Generating node configurations...
   ‚úÖ Created 60 node configurations

   ‚öôÔ∏è  Setting up time and physics parameters...

   üî® Assembling complete network configuration...

‚úÖ CONFIGURATION GENERATION COMPLETE
   Total Segments: 70
   Total Nodes: 60
   Entry Points: 4
   Exit Points: 4
   Junctions: 15
   Signalized Nodes: 8
‚úÖ CONFIGURATION GENERATION COMPLETE
   Total Segments: 70
   Total Nodes: 60
   Entry Points: 4
   Exit Points: 4
   Junctions: 15
   Signalized Nodes: 8
   Simulation Duration: 450.0s (7.5 min)
   Grid Resolution: 4 cells/100m

[NETWORK 

  return datetime.utcnow().replace(tzinfo=utc)


   [10/70] segments created
   [20/70] segments created
   [30/70] segments created
   [40/70] segments created
   [50/70] segments created
   [60/70] segments created
   [70/70] segments created
[NETWORK BUILD] ‚úÖ Network construction complete!
   Total segments: 70
   Total nodes: 60
Finalizing network structure and validating topology...
‚úÖ Network topology is valid.


  return datetime.utcnow().replace(tzinfo=utc)


‚úÖ GPUMemoryPool initialized:
   - Segments: 70
   - Total cells: 795
   - Ghost cells: 3
   - Compute Capability: (6, 0)
   - CUDA streams: Enabled
   - GPU memory allocated: 12.00 MB


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


  - Preparing GPU topology for network coupling...
    - GPU topology prepared and transferred.
üé≤ Random...


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_bound



[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_boundary_phases_bulk] Updating 16 segments on GPU, 0 not found in pool
[DEBUG set_bound

In [None]:
# Cell 5: Progressive Training Callback
class ProgressCallback(BaseCallback):
    def __init__(self, eval_env, ref_reward, target_pct=10.0, eval_freq=1000, n_eval=3):
        super().__init__()
        self.eval_env, self.ref = eval_env, ref_reward
        self.target = ref_reward * (1 + target_pct/100)
        self.eval_freq, self.n_eval = eval_freq, n_eval
        self.history, self.best, self.reached = [], -np.inf, False
        
    def _on_step(self):
        if self.n_calls % self.eval_freq == 0:
            rewards = []
            for _ in range(self.n_eval):
                obs, done, r = self.eval_env.reset()[0], False, 0
                while not done:
                    a, _ = self.model.predict(obs, deterministic=True)
                    obs, rew, done, _, _ = self.eval_env.step(a)
                    r += rew
                rewards.append(r)
            mean_r = np.mean(rewards)
            imp = ((mean_r - self.ref) / abs(self.ref)) * 100
            self.history.append({'step': self.num_timesteps, 'reward': mean_r, 'improvement': imp})
            if mean_r > self.best: self.best = mean_r
            status = "üéØ" if imp >= 10 else ""
            print(f"  [{self.num_timesteps:>6}] R={mean_r:>7.1f} | vs FT-90s: {imp:>+5.1f}% {status}")
            if imp >= 10 and not self.reached:
                self.reached = True
                print(f"\nüèÜ TARGET REACHED!")
        return True

FT90_REF = baseline_results['FT-90s']['mean_reward']
print(f"‚úÖ Callback ready | Reference: FT-90s = {FT90_REF:.1f} | Target: {FT90_REF*1.1:.1f}")

In [None]:
# Cell 6: Initialize DQN Model
train_env = create_env()
eval_env = create_env()

model = DQN("MlpPolicy", train_env, learning_rate=1e-4, buffer_size=50000,
            learning_starts=1000, batch_size=64, tau=0.005, gamma=0.99,
            exploration_fraction=0.3, exploration_final_eps=0.05,
            verbose=0, device='cuda' if torch.cuda.is_available() else 'cpu')

callback = ProgressCallback(eval_env, FT90_REF, target_pct=10.0, eval_freq=1000)
STATE = {'model': model, 'callback': callback, 'steps': 0, 'block': 0, 'done': False}

print(f"üöÄ DQN ready on {model.device}")

In [None]:
# Block 1: Steps 0 ‚Üí 10,000
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 1: 0 ‚Üí 10k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 1, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_10k")
    print(f"‚úÖ Block 1 done | Steps: {STATE['steps']} | Best: {STATE['callback'].best:.1f}")

In [None]:
# Block 2: Steps 10k ‚Üí 20k
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 2: 10k ‚Üí 20k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 2, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_20k")
    print(f"‚úÖ Block 2 done | Steps: {STATE['steps']} | Best: {STATE['callback'].best:.1f}")

In [None]:
# Block 3: Steps 20k ‚Üí 30k
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 3: 20k ‚Üí 30k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 3, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_30k")
    print(f"‚úÖ Block 3 done | Steps: {STATE['steps']} | Best: {STATE['callback'].best:.1f}")

In [None]:
# Block 4: Steps 30k ‚Üí 40k
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 4: 30k ‚Üí 40k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 4, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_40k")
    print(f"‚úÖ Block 4 done | Steps: {STATE['steps']} | Best: {STATE['callback'].best:.1f}")

In [None]:
# Block 5: Steps 40k ‚Üí 50k (HALFWAY)
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 5: 40k ‚Üí 50k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 5, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_50k")
    imp = ((STATE['callback'].best - FT90_REF) / abs(FT90_REF)) * 100
    print(f"üìà HALFWAY: {STATE['steps']} steps | Best improvement: {imp:+.1f}%")

In [None]:
# Block 6: Steps 50k ‚Üí 60k
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 6: 50k ‚Üí 60k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 6, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_60k")
    print(f"‚úÖ Block 6 done | Steps: {STATE['steps']} | Best: {STATE['callback'].best:.1f}")

In [None]:
# Block 7: Steps 60k ‚Üí 70k
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 7: 60k ‚Üí 70k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 7, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_70k")
    print(f"‚úÖ Block 7 done | Steps: {STATE['steps']} | Best: {STATE['callback'].best:.1f}")

In [None]:
# Block 8: Steps 70k ‚Üí 80k
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 8: 70k ‚Üí 80k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 8, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_80k")
    print(f"‚úÖ Block 8 done | Steps: {STATE['steps']} | Best: {STATE['callback'].best:.1f}")

In [None]:
# Block 9: Steps 80k ‚Üí 90k
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 9: 80k ‚Üí 90k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 9, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_90k")
    print(f"‚úÖ Block 9 done | Steps: {STATE['steps']} | Best: {STATE['callback'].best:.1f}")

In [None]:
# Block 10: Steps 90k ‚Üí 100k (FINAL)
if STATE['done']: print("‚úÖ Target reached, skipping")
else:
    print("üìä BLOCK 10 (FINAL): 90k ‚Üí 100k")
    STATE['model'].learn(10000, callback=STATE['callback'], reset_num_timesteps=False, progress_bar=True)
    STATE['block'], STATE['steps'] = 10, STATE['callback'].num_timesteps
    STATE['done'] = STATE['callback'].reached
    STATE['model'].save("/content/model_FINAL")

imp = ((STATE['callback'].best - FT90_REF) / abs(FT90_REF)) * 100
print(f"\n{'='*60}")
print(f"üèÅ TRAINING COMPLETE")
print(f"   Total steps: {STATE['steps']}")
print(f"   Best reward: {STATE['callback'].best:.1f}")
print(f"   Improvement vs FT-90s: {imp:+.1f}%")
print(f"   Target (+10%): {'‚úÖ ACHIEVED' if STATE['done'] else '‚ùå NOT ACHIEVED'}")
print(f"{'='*60}")

In [None]:
# Final: Plot & Save Results
import matplotlib.pyplot as plt
import json

h = STATE['callback'].history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot([x['step'] for x in h], [x['reward'] for x in h], 'b-o', markersize=3)
ax1.axhline(y=FT90_REF, color='r', linestyle='--', label='FT-90s')
ax1.axhline(y=baseline_results['FT-30s']['mean_reward'], color='g', linestyle=':', label='FT-30s')
ax1.set_xlabel('Steps'); ax1.set_ylabel('Reward'); ax1.legend(); ax1.set_title('Learning Curve')

ax2.plot([x['step'] for x in h], [x['improvement'] for x in h], 'b-o', markersize=3)
ax2.axhline(y=10, color='g', linestyle='--', label='Target +10%')
ax2.axhline(y=0, color='r', linestyle='-', alpha=0.5)
ax2.set_xlabel('Steps'); ax2.set_ylabel('Improvement (%)'); ax2.legend(); ax2.set_title('Improvement vs FT-90s')

plt.tight_layout()
plt.savefig('/content/stage3_results.png', dpi=150)
plt.show()

# Save JSON
results = {'baselines': baseline_results, 'history': h, 'best': STATE['callback'].best, 
           'improvement': ((STATE['callback'].best - FT90_REF) / abs(FT90_REF)) * 100}
with open('/content/stage3_results.json', 'w') as f: json.dump(results, f, indent=2)
print("üìÅ Saved: stage3_results.png, stage3_results.json")