In [None]:
from sumo_deeprl import SumoDeepRl
import os
import traci
from stable_baselines3.dqn.dqn import DQN

### Init Sumo-DeepRl class

In [None]:
junction = 'simple_2_intersection'
reward = 'diff-waiting-time'
sim_time='0.5hour'
vphs = [2000, 2500, 3000, 3500, 4000]

In [None]:
sumo = SumoDeepRl(junction_name=junction)

### Generate Trips (optional)

In [None]:
route_details = {'n': ['-E1', 2], 
                 's': ['-E3', 3],
                 'e': ['E0', 4],
                 'w': ['-E2', 3]}

for cars in [2000, 2500, 3000, 3500, 4000]:
  sumo.create_routes(route_details=route_details, time=1800, total_cars=cars)

### Simulation

In [None]:
# sumo.simulation(sim_time=sim_time,route_file='3000.rou.xml', useGui=False)
sumo.analyze_fixed(sim_time=sim_time,vphs=vphs, useGui=False)

In [None]:
traci.close()

### Model

In [None]:
"""
reward_fns = {
  "diff-waiting-time": _diff_waiting_time_reward, # sum waiting time(t) - sum waiting time(t+1)
  "average-speed": _average_speed_reward, # average speed of every car
  "queue": _queue_reward, # -1 * total number of halting cars
  "pressure": _pressure_reward, # Returns the pressure (#veh leaving - #veh approaching) of the intersection.
}
"""

train_num_veh = '3000'

env = sumo.create_environment(sim_time=sim_time,route_file=f'{train_num_veh}.rou.xml', num_seconds=2000,yellow_time=5, reward_fn=reward)

In [None]:
model = DQN(
  env=env,
  policy="MlpPolicy",
  learning_starts=0,
  train_freq=1,
  exploration_initial_eps=0.05,
  exploration_final_eps=0.01,
  verbose=1
)

In [None]:
total_timesteps = 10000
model.learn(total_timesteps=total_timesteps, reset_num_timesteps=False)


out_dir = f"{junction}/{sim_time}/model/{reward}"
if not os.path.exists(out_dir):
  os.makedirs(out_dir)
fullname = os.path.join(out_dir, train_num_veh)   
model.save(fullname)
env.close()

### Result from model

In [None]:
train_model = DQN.load(f'{junction}/{sim_time}/model/{reward}/{train_num_veh}.zip', env=env)

In [None]:
sumo.predict(sim_time=sim_time,route_file='3000.rou.xml', model=train_model, useGui=True, yellow_time=5)
sumo.analyze_predict(model=train_model, sim_time=sim_time, vphs=vphs, trained_number_veh=train_num_veh, reward=reward,yellow_time=5)

## Saint Paul

In [None]:
junction = 'saint_paul'
reward = 'diff-waiting-time'
sim_time='1.0hour'
vphs = [1000, 2000, 3000, 4000, 5000]

In [None]:
sumo = SumoDeepRl(junction_name=junction)

In [None]:
sumo.analyze_fixed(sim_time='1.0hour',vphs=[1000, 2000, 3000, 4000, 5000], useGui=False)

In [None]:
env = sumo.create_environment(sim_time=sim_time,route_file='3000.rou.xml',num_seconds=15000,yellow_time=5, reward_fn=reward)

In [None]:
train_model = DQN.load(f'{junction}/1.0hour/model/{reward}/3000.zip', env=env)

In [None]:
sumo.predict(sim_time=sim_time,route_file='3000.rou.xml', model=train_model, useGui=True, yellow_time=5)

In [None]:
traci.close()

In [None]:
sumo.analyze_predict(model=train_model,
                     sim_time=sim_time,
                     vphs=vphs,
                     trained_number_veh='3000',
                     reward=reward,
                     yellow_time=5)

In [None]:
traci.close()