In [None]:
from flow.core.params import VehicleParams, InFlows, SumoCarFollowingParams, SumoParams, EnvParams, InitialConfig, \
    NetParams, SumoLaneChangeParams, TrafficLightParams
from flow.controllers import IDMController, RLController
from controller import SpecificMergeRouter
from network import HighwayRampsNetwork, ADDITIONAL_NET_PARAMS
import numpy as np
import sys


# ----------- Configurations -----------#
TRAINING = True
# TRAINING = False

# TESTING = True
TESTING = True

# DEBUG = True
DEBUG = False

RENDER = False
# RENDER = True

NUM_HUMAN = 20
actual_num_human = 10

NUM_MERGE_0 = 10
NUM_MERGE_1 = 10

MAX_AV_SPEED = 14
MAX_HV_SPEED = 10

VEH_COLORS = ['blue', 'red']

#######################################################


Router = SpecificMergeRouter

vehicles = VehicleParams()
vehicles.add(veh_id="human",
             lane_change_params=SumoLaneChangeParams('only_strategic_safe'),
             car_following_params=SumoCarFollowingParams(speed_mode='right_of_way', min_gap=5, tau=1,
                                                         max_speed=MAX_HV_SPEED),
             acceleration_controller=(IDMController, {}),
             routing_controller=(Router, {}),
             )

vehicles.add(veh_id="merge_0",
             lane_change_params=SumoLaneChangeParams('no_cooperative_safe'),
             car_following_params=SumoCarFollowingParams(speed_mode='no_collide', min_gap=1, tau=1,
                                                         max_speed=MAX_AV_SPEED),
             acceleration_controller=(RLController, {}),
             routing_controller=(Router, {}),
             color=VEH_COLORS[0])

vehicles.add(veh_id="merge_1",
             lane_change_params=SumoLaneChangeParams('no_cooperative_safe'),
             car_following_params=SumoCarFollowingParams(speed_mode='no_collide', min_gap=1, tau=1,
                                                         max_speed=MAX_AV_SPEED),
             acceleration_controller=(RLController, {}),
             routing_controller=(Router, {}),
             color=VEH_COLORS[1])

initial_config = InitialConfig(spacing='uniform')

inflow = InFlows()
#--------------------------------------------------------------------------

obstacle = {"departPos": np.random.randint(100, 101),
            "arrivalPos": 200, 
            "arrivalEdge":0,  
            "speedFactor":0.00001, 
            "color":"red"}

inflow.add(veh_type="human",
           edge="highway_0",
           period=100000,
           depart_lane=1,
           depart_speed=0,
           route='routehighway_0_0',
           number=1,
           **obstacle)

#--------------------------------------------------------------------------
inflow.add(veh_type="human",
           edge="highway_0",
           probability=0.1,
           depart_lane=0,
           depart_speed='random',
           route='routehighway_0_0',
           number=int(actual_num_human/2))

inflow.add(veh_type="human",
           edge="highway_0",
           probability=0.1,
           depart_lane=2,
           depart_speed='random',
           route='routehighway_0_0',
           number=int(actual_num_human/2))

inflow.add(veh_type="merge_0",
           edge="highway_0",
           probability=0.1,
           depart_lane='random',
           depart_speed='random',
           route='routehighway_0_0',
           number=NUM_MERGE_0)

inflow.add(veh_type="merge_1",
           edge="highway_0",
           probability=0.1,
           depart_lane='random',
           depart_speed='random',
           route='routehighway_0_0',
           number=NUM_MERGE_1)

sim_params = SumoParams(sim_step=0.1, restart_instance=True, render=RENDER, save_render=False)

from specific_environment import MergeEnv

intention_dic = {"human": 0, "merge_0": 1, "merge_1": 2}
terminal_edges = ['off_ramp_0', 'off_ramp_1', 'highway_2']

env_params = EnvParams(warmup_steps=100,
                       additional_params={"intention": intention_dic,
                                          "max_av_speed": MAX_AV_SPEED,
                                          "max_hv_speed": MAX_HV_SPEED})

additional_net_params = ADDITIONAL_NET_PARAMS.copy()
additional_net_params['num_vehicles'] = NUM_HUMAN + NUM_MERGE_0 + NUM_MERGE_1
additional_net_params['num_cav'] = NUM_MERGE_0 + NUM_MERGE_1
additional_net_params['num_hv'] = NUM_HUMAN
additional_net_params['terminal_edges'] = terminal_edges

net_params = NetParams(inflows=inflow, additional_params=additional_net_params)

traffic_lights = TrafficLightParams()

network = HighwayRampsNetwork("highway_ramp", vehicles, net_params, initial_config, traffic_lights)


# ----------- Model Building -----------#
flow_params = dict(
    exp_tag='test_network',
    env_name=MergeEnv,
    network=network,
    simulator='traci',
    sim=sim_params,
    env=env_params,
    net=net_params,
    veh=vehicles,
    initial=initial_config,
    tls=traffic_lights
)

# number of time steps
flow_params['env'].horizon = 2500


from Experiment.DuelingDoubleDQN_experiments import Experiment

exp = Experiment(flow_params)


# run the sumo simulation
exp.run(num_runs=1, training=TRAINING,
        testing=TESTING,
        num_human=NUM_HUMAN,
        actual_num_human=actual_num_human,
        num_cav=(NUM_MERGE_0 + NUM_MERGE_1),
        model='GRL',
        debug=DEBUG)


  from .autonotebook import tqdm as notebook_tqdm


The model will be saved at: GRL_Trained_Models/DD_DQN/DD_DQN_2
satisfied:  flow_4.3
satisfied:  flow_3.7
satisfied:  flow_3.8
satisfied:  flow_4.8
satisfied:  flow_4.5
Training Episode: 1 Reward: -3090.2916485447035
Statistics: [('average_q', nan), ('average_loss', nan), ('cumulative_steps', 2500), ('n_updates', 0), ('rlen', 2500)]
satisfied:  flow_3.1
satisfied:  flow_4.1
satisfied:  flow_3.6
satisfied:  flow_4.7
satisfied:  flow_4.6
satisfied:  flow_4.8
satisfied:  flow_3.8
Training Episode: 2 Reward: -1756.989819453279
Statistics: [('average_q', nan), ('average_loss', nan), ('cumulative_steps', 5000), ('n_updates', 0), ('rlen', 5000)]
satisfied:  flow_4.2
satisfied:  flow_3.6
satisfied:  flow_4.4
satisfied:  flow_3.7
satisfied:  flow_4.9
Training Episode: 3 Reward: -2592.646572076917
Statistics: [('average_q', nan), ('average_loss', nan), ('cumulative_steps', 7500), ('n_updates', 0), ('rlen', 7500)]
satisfied:  flow_3.1
satisfied:  flow_4.5
satisfied:  flow_3.2
satisfied:  flow_4.7


satisfied:  flow_3.1
satisfied:  flow_3.0
satisfied:  flow_3.2
satisfied:  flow_3.3
satisfied:  flow_3.4
satisfied:  flow_3.5
satisfied:  flow_4.0
satisfied:  flow_3.6
satisfied:  flow_4.1
satisfied:  flow_4.2
satisfied:  flow_3.7
satisfied:  flow_3.8
satisfied:  flow_4.3
satisfied:  flow_3.9
satisfied:  flow_4.4
satisfied:  flow_4.5
satisfied:  flow_4.6
satisfied:  flow_4.7
satisfied:  flow_4.8
satisfied:  flow_4.9
Counter({'num_full_filled': 20})
done in:  2166
Training Episode: 18 Reward: 5995.172945659009
Statistics: [('average_q', 0.98312426), ('average_loss', 0.7696903255581856), ('cumulative_steps', 34408), ('n_updates', 1441), ('rlen', 34408)]
satisfied:  flow_4.0
satisfied:  flow_3.0
satisfied:  flow_3.1
satisfied:  flow_3.2
satisfied:  flow_3.3
satisfied:  flow_4.1
satisfied:  flow_4.2
satisfied:  flow_4.3
satisfied:  flow_4.4
satisfied:  flow_4.5
satisfied:  flow_3.4
satisfied:  flow_3.5
satisfied:  flow_3.6
satisfied:  flow_4.6
satisfied:  flow_3.7
satisfied:  flow_4.7
sati