In [16]:
import sys
import random
import torch
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Dataset
import sumolib
import traci
from sumolib import checkBinary
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import sys
import io
from contextlib import redirect_stdout
import matplotlib.pyplot as plt
import pandas as pd
import os
import math
from collections import namedtuple, deque
import gym


if 'SUMO_HOME' in os.environ:
    print('SUMO_HOME found')
    sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))

sumoBinary = checkBinary('sumo-gui')
# sumoBinary = checkBinary('sumo')
roadNetwork = "./config/osm.sumocfg"
sumoCmd = [sumoBinary, "-c", roadNetwork, "--start", "--quit-on-end"]
# use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device: " + str(device))

SUMO_HOME found
Using device: cuda


In [17]:
def intervehicleConnectivity(threshold = None):
    xs = []
    ys = []
    for vehicle in traci.vehicle.getIDList():
        x, y = traci.vehicle.getPosition(vehicle)
        xs.append(x)
        ys.append(y)
    xs = torch.tensor(xs, dtype=torch.float32).to(device).view(-1,1)
    ys = torch.tensor(ys, dtype=torch.float32).to(device).view(-1,1)
    intervehicle_distances = torch.sqrt((xs - xs.t())**2 + (ys - ys.t())**2)
    if threshold is not None:
        # make the distances 1 if less than the threshold, 0 otherwise
        intervehicle_distances = torch.where(intervehicle_distances < threshold, torch.ones_like(intervehicle_distances), torch.zeros_like(intervehicle_distances))
    return intervehicle_distances, xs, ys

In [25]:
def randomTrips(dur=1000, density=12):
    os.system("python $SUMO_HOME/tools/randomTrips.py -n config/osm.net.xml.gz -r config/osm.passenger.trips.xml -e " + str(dur) + " -l --insertion-density=" + str(density))

def shouldContinueSim():
    numVehicles = traci.simulation.getMinExpectedNumber()
    return True if numVehicles > 0 else False

def restart(sumoCmd):
    with io.StringIO() as buf, redirect_stdout(buf):
        try:
            traci.close()
        except:
            pass
        traci.start(sumoCmd)

def close():
    traci.close()

In [26]:
import copy

class Knowledges:
    def __init__(self):
        self.knowledges = {}
        self.delays = {}
    
    def add_observations(self, vehicles, observed_vehicles):
        for vehicle, visibility in zip(vehicles, observed_vehicles):
            if vehicle not in self.knowledges:
                self.knowledges[vehicle] = []
                self.delays[vehicle] = 0
            self.knowledges[vehicle].append(int(visibility))
            if visibility == 0:
                self.delays[vehicle] += 1
            else:
                self.delays[vehicle] = 0
    
    def merge_knowledges(self, new_knowledges, new_delays):
        prev_missing, prev_delay = self.evaluate_knowledge()
        for vehicle, visibility in new_knowledges.items():
            if vehicle not in self.knowledges:
                self.knowledges[vehicle] = copy.deepcopy(visibility)
                self.delays[vehicle] = new_delays[vehicle]
            else:
                for i in range(1, len(self.knowledges[vehicle])+1):
                    if i > len(visibility):
                        break
                    self.knowledges[vehicle][-i] = visibility[-i] | self.knowledges[vehicle][-i]
                self.delays[vehicle] = min(self.delays[vehicle], new_delays[vehicle])
        new_missing, new_delay = self.evaluate_knowledge()
        return copy.deepcopy(self.knowledges), copy.deepcopy(self.delays), prev_missing - new_missing, prev_delay - new_delay

    def get_knowledges(self):
        return copy.deepcopy(self.knowledges)
    
    def get_delays(self):
        return copy.deepcopy(self.delays)
    
    def evaluate_knowledge(self):
        observed = 0
        delay = 0
        num_vehicles = len(self.knowledges)
        for vehicle, visibility in self.knowledges.items():
            observed += sum(visibility)
            delay += self.delays[vehicle]
        return 1-(observed / num_vehicles), delay / num_vehicles

class Beacon:
    def __init__(self, trace_hidden):
        self.trace_hidden = copy.deepcopy(trace_hidden)
    
    def update(self, trace_hidden):
        self.trace_hidden = copy.deepcopy(trace_hidden)


In [27]:
Transition = namedtuple('Transition',('state', 'next_state', 'reward'))

class GRU_RL(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(GRU_RL, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x, h=None):
        out, h = self.gru(x, h)
        out = self.fc(out[:, -1, :])
        out = self.relu(out)
        out = self.fc(out)
        out = self.softmax(out)
        return out, h
    
    def init_hidden(self, batch_size):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)


class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

## Get Max Action Space

In [28]:
# find the maximum action space
randomTrips(1000, 1.5)
restart(sumoCmd)
max_action_space = 0
total_actions = 0

total_missing_gain = 0
total_delay_gain = 0
step = 0
max_action_space = 0


while shouldContinueSim():
    step += 1
    if step > 1100:
        close()
        break
    traci.simulationStep()
    ids = traci.vehicle.getIDList()
    connectivity, xs, ys = intervehicleConnectivity(800)
    # minus the diagonal
    action_spaces = connectivity.to("cpu") - torch.eye(connectivity.size(0))
    for i, vehicle in enumerate(ids):
        # get non-zero indices except the diagonal
        non_zero_indices = np.where(action_spaces[i] == 1)[0]
        if len(non_zero_indices) > max_action_space:
            max_action_space = len(non_zero_indices)
print("Max action space: ", max_action_space)

Success.
Max action space:  15


In [None]:
class SumoGym(gym.Env):
    def __init__(self, sumoCmd, max_action_space, max_steps=1100):
        self.sumoCmd = sumoCmd
        self.max_action_space = max_action_space
        self.max_steps = max_steps
        self.step_counter = 0
        self.vehicle_knowledges = {}
        self.vehicle_ids = None
        

    def act(self, vehicle, selected_index):
        selected_index = random.choice(non_zero_indices)
        receiver = ids[selected_index]
        _, _, missing_gain, delay_gain = self.vehicle_knowledges[receiver].merge_knowledges(self.vehicle_knowledges[vehicle].get_knowledges(), self.vehicle_knowledges[vehicle].get_delays())
        reward = self.getRewards(missing_gain, delay_gain)
        return reward
    
    def step(self):
        if self.getDoneState():
            traci.close()
            return True
        else:
            traci.simulationStep()
            self.vehicle_ids = traci.vehicle.getIDList()
            connectivity, xs, ys = intervehicleConnectivity(800)
            # minus the diagonal
            action_spaces = connectivity.to("cpu") - torch.eye(connectivity.size(0))
            
            for i, vehicle in enumerate(ids):
                if vehicle not in self.vehicle_knowledges:
                    self.vehicle_knowledges[vehicle] = Knowledges()
                vehicle_knowledges[vehicle].add_observations(ids, connectivity[i])
            return False
        


    def render(self):
        self.show_gui = True
        pass


    def reset(self):
        try:
            traci.close()
        except:
            pass
        traci.start(sumoCmd)

        while not self.getCurrentStates():
            traci.simulationStep()
            self.step_counter += 1
            self.vehicle_ids = traci.vehicle.getIDList()
            if self.vehicle_ids:
                break

        return self.getCurrentStates()


    def getCurrentStates(self):
        """
        function: Get all the states of vehicles, observation space.
        """
        states = []
        for vehicle in self.vehicle_ids:
            state = []
            for observed_vehicle in self.vehicle_ids:
                if vehicle == observed_vehicle:
                    state.append(0)
                else:
                    state.append(1)
            states.append(state)
        return states


    def getRewards(self, missing_reduction, delay_reduction, missing_coefficient=2, delay_coefficient=3):
        return missing_reduction * missing_coefficient + delay_reduction * delay_coefficient
        

    def getDoneState(self):
        """
        function: get the done state of simulation.
        """
        return not (shouldContinueSim() and self.step_counter <= self.max_steps)

In [None]:
# BATCH_SIZE is the number of transitions sampled from the replay buffer
# GAMMA is the discount factor as mentioned in the previous section
# EPS_START is the starting value of epsilon
# EPS_END is the final value of epsilon
# EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay
# TAU is the update rate of the target network
# LR is the learning rate of the ``AdamW`` optimizer
BATCH_SIZE = 128
GAMMA = 0.99
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 1000
TAU = 0.005
LR = 1e-4

# Get number of actions from gym action space
n_actions = max_action_space
# Get the number of state observations
state, info = env.reset()
n_observations = len(state)

policy_net = DQN(n_observations, n_actions).to(device)
target_net = DQN(n_observations, n_actions).to(device)
target_net.load_state_dict(policy_net.state_dict())

optimizer = optim.AdamW(policy_net.parameters(), lr=LR, amsgrad=True)
memory = ReplayMemory(10000)


steps_done = 0


def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            # t.max(1) will return the largest column value of each row.
            # second column on max result is index of where max element was
            # found, so we pick action with the larger expected reward.
            return policy_net(state).max(1).indices.view(1, 1)
    else:
        return torch.tensor([[env.action_space.sample()]], device=device, dtype=torch.long)


episode_durations = []

In [13]:
# find the maximum action space
restart(800, 1.5)
max_action_space = 0
vehicle_knowledges = {}
total_actions = 0

total_missing_gain = 0
total_delay_gain = 0
# total_large_delay = 0
step = 0
max_action_space = 0


while shouldContinueSim():
    step += 1
    if step > 1100:
        close()
        break
    traci.simulationStep()
    ids = traci.vehicle.getIDList()
    connectivity, xs, ys = intervehicleConnectivity(800)
    # minus the diagonal
    action_spaces = connectivity.to("cpu") - torch.eye(connectivity.size(0))
    
    for i, vehicle in enumerate(ids):
        if vehicle not in vehicle_knowledges:
            vehicle_knowledges[vehicle] = Knowledges()
        vehicle_knowledges[vehicle].add_observations(ids, connectivity[i])
    for i, vehicle in enumerate(ids):
        # get non-zero indices except the diagonal
        non_zero_indices = np.where(action_spaces[i] == 1)[0]
        # for index in non_zero_indices:
        #     receiver = ids[index]
        #     assert vehicle in vehicle_knowledges and receiver in vehicle_knowledges
        #     vehicle_knowledges[receiver].merge_knowledges(vehicle_knowledges[vehicle].get_knowledges(), vehicle_knowledges[vehicle].get_delays())
        # if len(non_zero_indices) > 0:
        #     selected_index = random.choice(non_zero_indices)
        #     receiver = ids[selected_index]
        #     assert vehicle in vehicle_knowledges and receiver in vehicle_knowledges
        #     _, _, missing_gain, delay_gain = vehicle_knowledges[receiver].merge_knowledges(vehicle_knowledges[vehicle].get_knowledges(), vehicle_knowledges[vehicle].get_delays())
        #     total_missing_gain += missing_gain
        #     total_delay_gain += delay_gain
        #     total_actions += 1
        if len(non_zero_indices) > max_action_space:
            max_action_space = len(non_zero_indices)
print("Max action space: ", max_action_space)
#     for i, vehicle in enumerate(ids):
#         total_states += 1
#         missing, delay, large_delay = vehicle_knowledges[vehicle].evaluate_knowledge()
#         total_missing += missing
#         total_delay += delay
#         total_large_delay += large_delay
# print("Average missing: ", total_missing / total_states)
# print("Average delay: ", total_delay / total_states)
# print("Average large delay: ", total_large_delay / total_states)

# print("Average missing gain: ", total_missing_gain / total_actions)
# print("Average delay gain: ", total_delay_gain / total_actions)

Success.
Max action space:  15


In [7]:
num_vehicles = len(vehicle_knowledges)
total_missing = 0
total_delay = 0
step = 0
max_delay = 0 
for vehicle, knowledge in vehicle_knowledges.items():
    m, d, l = knowledge.evaluate_knowledge() 
    total_missing += m
    total_delay += d
    max_delay = max(max_delay, l)
print("Average missing: ", total_missing/num_vehicles)
print("Average delay: ", total_delay/num_vehicles)
print("Max delay: ", max_delay)

Average missing:  0.035199839455880545
Average delay:  1.4695487863602796
Max delay:  0.5606060606060606
