In [119]:
import sys
import random
import torch
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Dataset
import sumolib
import traci
from sumolib import checkBinary
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import sys
import io
from contextlib import redirect_stdout
import matplotlib.pyplot as plt
import pandas as pd
import os
import math
from collections import namedtuple, deque
import gym

if 'SUMO_HOME' in os.environ:
    print('SUMO_HOME found')
    sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))

# sumoBinary = checkBinary('sumo-gui')
sumoBinary = checkBinary('sumo')
roadNetwork = "./config/osm.sumocfg"
sumoCmd = [sumoBinary, "-c", roadNetwork, "--start", "--quit-on-end"]
# use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device: " + str(device))

SUMO_HOME found
Using device: cuda


In [86]:
def intervehicleConnectivity(threshold = None):
    xs = []
    ys = []
    for vehicle in traci.vehicle.getIDList():
        x, y = traci.vehicle.getPosition(vehicle)
        xs.append(x)
        ys.append(y)
    xs = torch.tensor(xs, dtype=torch.float32).to(device).view(-1,1)
    ys = torch.tensor(ys, dtype=torch.float32).to(device).view(-1,1)
    intervehicle_distances = torch.sqrt((xs - xs.t())**2 + (ys - ys.t())**2)
    if threshold is not None:
        # make the distances 1 if less than the threshold, 0 otherwise
        intervehicle_distances = torch.where(intervehicle_distances < threshold, torch.ones_like(intervehicle_distances), torch.zeros_like(intervehicle_distances))
    return intervehicle_distances, xs.to("cpu").squeeze(), ys.to("cpu").squeeze()

In [87]:
def randomTrips(dur=1000, density=12):
    os.system("python $SUMO_HOME/tools/randomTrips.py -n config/osm.net.xml.gz -r config/osm.passenger.trips.xml -e " + str(dur) + " -l --insertion-density=" + str(density))

def shouldContinueSim():
    numVehicles = traci.simulation.getMinExpectedNumber()
    return True if numVehicles > 0 else False

def restart(sumoCmd):
    with io.StringIO() as buf, redirect_stdout(buf):
        try:
            traci.close()
        except:
            pass
        traci.start(sumoCmd)

def close():
    traci.close()

In [88]:
import copy

class Knowledges:
    def __init__(self):
        self.knowledges = {}
        self.delays = {}
    
    def add_observations(self, vehicles, observed_vehicles):
        for vehicle, visibility in zip(vehicles, observed_vehicles):
            if vehicle not in self.knowledges:
                self.knowledges[vehicle] = []
                self.delays[vehicle] = 0
            self.knowledges[vehicle].append(int(visibility))
            if visibility == 0:
                self.delays[vehicle] += 1
            else:
                self.delays[vehicle] = 0
    
    def merge_knowledges(self, new_knowledges, new_delays):
        prev_missing, prev_delay = self.evaluate_knowledge()
        for vehicle, visibility in new_knowledges.items():
            if vehicle not in self.knowledges:
                self.knowledges[vehicle] = copy.deepcopy(visibility)
                self.delays[vehicle] = new_delays[vehicle]
            else:
                for i in range(1, len(self.knowledges[vehicle])+1):
                    if i > len(visibility):
                        break
                    self.knowledges[vehicle][-i] = visibility[-i] | self.knowledges[vehicle][-i]
                self.delays[vehicle] = min(self.delays[vehicle], new_delays[vehicle])
        new_missing, new_delay = self.evaluate_knowledge()
        return copy.deepcopy(self.knowledges), copy.deepcopy(self.delays), prev_missing - new_missing, prev_delay - new_delay

    def get_knowledges(self):
        return copy.deepcopy(self.knowledges)
    
    def get_delays(self):
        return copy.deepcopy(self.delays)
    
    def evaluate_knowledge(self):
        observed = 0
        delay = 0
        num_vehicles = len(self.knowledges)
        for vehicle, visibility in self.knowledges.items():
            observed += sum(visibility)
            delay += self.delays[vehicle]
        return 1-(observed / num_vehicles), delay / num_vehicles

class Beacon:
    def __init__(self, trace_hidden):
        self.trace_hidden = copy.deepcopy(trace_hidden)
    
    def update(self, trace_hidden):
        self.trace_hidden = copy.deepcopy(trace_hidden)


In [211]:
Transition = namedtuple('Transition',('state', 'action', 'reward'))

class GRU_RL(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(GRU_RL, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x, h=None):
        out, h = self.gru(x, h)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.softmax(out)
        return out, h
    
    def init_hidden(self, batch_size):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)


class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

## Get Max Action Space

In [197]:
# find the maximum action space
randomTrips(1000, 1.5)
restart(sumoCmd)
max_action_space = 0
total_actions = 0

total_missing_gain = 0
total_delay_gain = 0
step = 0
max_action_space = 0


while shouldContinueSim():
    step += 1
    if step > 1100:
        close()
        break
    traci.simulationStep()
    ids = traci.vehicle.getIDList()
    connectivity, xs, ys = intervehicleConnectivity(800)
    # minus the diagonal
    action_spaces = connectivity.to("cpu") - torch.eye(connectivity.size(0))
    for i, vehicle in enumerate(ids):
        # get non-zero indices except the diagonal
        non_zero_indices = np.where(action_spaces[i] == 1)[0]
        if len(non_zero_indices) > max_action_space:
            max_action_space = len(non_zero_indices)
print("Max action space: ", max_action_space)

Success.
Simulation ended at time: 14.00
Reason: TraCI requested termination.
Performance: 
 Duration: 211.44s
 TraCI-Duration: 0.01s
 Real time factor: 0.0662136
 UPS: 0.089861
Vehicles: 
 Inserted: 2 (Loaded: 25)
 Running: 2
 Waiting: 0
Statistics (avg of 0):
 RouteLength: 0.00
 Speed: 0.00
 Duration: 0.00
 WaitingTime: 0.00
 TimeLoss: 0.00
 DepartDelay: 0.00

***Starting server on port 45355 ***
Loading net-file from './config/osm.net.xml.gz' ... done (104ms).
Loading done.
Simulation version 1.20.0 started with time: 0.00.
Simulation ended at time: 1100.00
Reason: TraCI requested termination.
Performance: 
 Duration: 1.78s
 TraCI-Duration: 1.56s
 Real time factor: 616.938
 UPS: 25339.315760
Vehicles: 
 Inserted: 111
 Running: 52
 Waiting: 0
Statistics (avg of 59):
 RouteLength: 4182.20
 Speed: 9.57
 Duration: 445.41
 WaitingTime: 15.56
 TimeLoss: 61.44
 DepartDelay: 0.48

Max action space:  15


In [183]:
class SumoGym(gym.Env):
    def __init__(self, sumoCmd, max_action_space, max_steps=1100):
        self.sumoCmd = sumoCmd
        self.max_action_space = max_action_space
        self.max_steps = max_steps
        self.step_counter = 0
        self.vehicle_knowledges = {}
        self.vehicle_ids = None
        self.action_spaces = None
        self.xs = None
        self.ys = None

    def act(self, vehicle, selected_index):
        print("Vehicle: ", vehicle, " selected index: ", selected_index)
        selected_index = random.choice(non_zero_indices)
        receiver = self.vehicle_ids[selected_index]
        _, _, missing_gain, delay_gain = self.vehicle_knowledges[receiver].merge_knowledges(self.vehicle_knowledges[vehicle].get_knowledges(), self.vehicle_knowledges[vehicle].get_delays())
        reward = self.getRewards(missing_gain, delay_gain)
        return reward
    
    def step(self):
        if self.getDoneState():
            print("Simulation is done.")
            traci.close()
            return True
        else:
            traci.simulationStep()
            self.step_counter += 1
            self.vehicle_ids = traci.vehicle.getIDList()
            connectivity, self.xs, self.ys = intervehicleConnectivity(800)
            # minus the diagonal
            self.action_spaces = connectivity.to("cpu") - torch.eye(connectivity.size(0))
            
            for i, vehicle in enumerate(self.vehicle_ids):
                if vehicle not in self.vehicle_knowledges:
                    self.vehicle_knowledges[vehicle] = Knowledges()
                self.vehicle_knowledges[vehicle].add_observations(self.vehicle_ids, connectivity[i])
            return False
        
    def render(self):
        self.show_gui = True

    def reset(self):
        try:
            traci.close()
        except:
            pass
        traci.start(sumoCmd)

        while not self.getDoneState():
            self.step()
            self.vehicle_ids = traci.vehicle.getIDList()
            if len(self.vehicle_ids)>1:
                break

        return self.getCurrentStates()

    def getCurrentStates(self):
        """
        function: Get all the states of vehicles, observation space.
        """
        states = []
        for i in range(len(self.action_spaces)):
            # pad both xs and ys to the max action space
            other_vehicles_xs = (self.xs * self.action_spaces[i])
            other_vehicles_ys = (self.ys * self.action_spaces[i])
            other_vehicles_xs = F.pad(other_vehicles_xs, (0, self.max_action_space - other_vehicles_xs.size(0)), "constant", 0)
            other_vehicles_ys = F.pad(other_vehicles_ys, (0, self.max_action_space - other_vehicles_ys.size(0)), "constant", 0)
            states.append(torch.cat((other_vehicles_xs, other_vehicles_ys)).view(-1))
        states = torch.stack(states)
        return states, self.vehicle_ids

        
    def getRewards(self, missing_reduction, delay_reduction, missing_coefficient=2, delay_coefficient=3):
        return missing_reduction * missing_coefficient + delay_reduction * delay_coefficient
        
    def getDoneState(self):
        """
        function: get the done state of simulation.
        """
        return not (shouldContinueSim() and self.step_counter <= self.max_steps)

In [213]:
# BATCH_SIZE is the number of transitions sampled from the replay buffer
# GAMMA is the discount factor as mentioned in the previous section
# EPS_START is the starting value of epsilon
# EPS_END is the final value of epsilon
# EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay
# TAU is the update rate of the target network
# LR is the learning rate of the ``AdamW`` optimizer
BATCH_SIZE = 128
GAMMA = 0.99
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 1000
TAU = 0.005
LR = 1e-4

# Get number of actions from gym action space
n_actions = max_action_space
# Get the number of state observations
env = SumoGym(sumoCmd, max_action_space)
state, v_ids = env.reset()
n_observations = max_action_space * 2

policy_net = GRU_RL(input_size = n_observations, hidden_size=128, output_size=n_actions, num_layers=5).to(device)
target_net = GRU_RL(input_size = n_observations, hidden_size=128, output_size=n_actions, num_layers=5).to(device)
target_net.load_state_dict(policy_net.state_dict())

optimizer = optim.AdamW(policy_net.parameters(), lr=LR, amsgrad=True)
memory = ReplayMemory(10000)


steps_done = 0


def select_action(state, hidden=None):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    with torch.no_grad():
        out, out_hidden = policy_net(state.unsqueeze(0), hidden)
        if sample > eps_threshold:
                return out.max(1).indices.view(1, 1), out_hidden
        else:
            return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long), out_hidden


Simulation ended at time: 12.00
Reason: TraCI requested termination.
Performance: 
 Duration: 5.64s
 TraCI-Duration: 0.01s
 Real time factor: 2.12615
 UPS: 2.657690
Vehicles: 
 Inserted: 2 (Loaded: 25)
 Running: 2
 Waiting: 0
Statistics (avg of 0):
 RouteLength: 0.00
 Speed: 0.00
 Duration: 0.00
 WaitingTime: 0.00
 TimeLoss: 0.00
 DepartDelay: 0.00

 Retrying in 1 seconds
***Starting server on port 51359 ***
Loading net-file from './config/osm.net.xml.gz' ... done (109ms).
Loading done.
Simulation version 1.20.0 started with time: 0.00.


In [214]:
def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    batch = Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    # (a final state would've been the one after which simulation ended)
    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken. These are the actions which would've been taken
    # for each batch state according to policy_net
    state_action_values = policy_net(state_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    # Expected values of actions for non_final_next_states are computed based
    # on the "older" target_net; selecting their best reward with max(1).values
    # This is merged based on the mask, such that we'll have either the expected
    # state value or 0 in case the state was final.
    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    with torch.no_grad():
        next_state_values[non_final_mask] = target_net(non_final_next_states).max(1).values
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    criterion = nn.SmoothL1Loss()
    loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    # In-place gradient clipping
    torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100)
    optimizer.step()

In [221]:
num_episodes = 1

for i_episode in range(num_episodes):
    # Initialize the environment and get its state
    states, v_ids = env.reset()
    states = states.to(device)
    done = False
    hiddens = {}
    while not done:
        done = env.step()
        states, v_ids = env.getCurrentStates()
        for i in range(states.size(0)):
            curr_v = v_ids[i]
            state = states[i].to(device)
            if curr_v not in hiddens:
                action, v_hidden = select_action(state)
            else:
                action, v_hidden = select_action(state, hiddens[curr_v])
            hiddens[curr_v] = v_hidden
            if action >= len(env.action_spaces[i]) or env.action_spaces[i][action.item()] == 0:
                reward = torch.tensor([0], device=device)
            else:
                reward = env.act(v_ids[i], action)
                reward = torch.tensor([reward], device=device)
            memory.push(state, action, reward)

            # Perform one step of the optimization (on the policy network)
            optimize_model()

            # Soft update of the target network's weights
            # θ′ ← τ θ + (1 −τ )θ′
            target_net_state_dict = target_net.state_dict()
            policy_net_state_dict = policy_net.state_dict()
            for key in policy_net_state_dict:
                target_net_state_dict[key] = policy_net_state_dict[key]*TAU + target_net_state_dict[key]*(1-TAU)
            target_net.load_state_dict(target_net_state_dict)

Simulation ended at time: 21.00
Reason: TraCI requested termination.
Performance: 
 Duration: 19.19s
 TraCI-Duration: 0.04s
 Real time factor: 1.09455
 UPS: 1.876368
Vehicles: 
 Inserted: 3 (Loaded: 25)
 Running: 3
 Waiting: 0
Statistics (avg of 0):
 RouteLength: 0.00
 Speed: 0.00
 Duration: 0.00
 WaitingTime: 0.00
 TimeLoss: 0.00
 DepartDelay: 0.00

 Retrying in 1 seconds
***Starting server on port 39463 ***
Loading net-file from './config/osm.net.xml.gz' ... done (105ms).
Loading done.
Simulation version 1.20.0 started with time: 0.00.
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
2
0
1
2
0
1
2
0
1
2
0
1
2
0
1
2
0
1
2
0
1
2
0
1
2
0
1
2
3
0
1
2
3
0
1
2
3
0
1
2
3
0
1
2
3
0
1
2


AttributeError: 'Transition' object has no attribute 'next_state'