In [140]:
import sys
import random
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Dataset
import sumolib
import traci
from sumolib import checkBinary
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import sys
import io
from contextlib import redirect_stdout
import matplotlib.pyplot as plt
import pandas as pd
import os
import math
from collections import namedtuple, deque
import gym
from torch_geometric.utils import dense_to_sparse
import copy
from itertools import count

if 'SUMO_HOME' in os.environ:
    print('SUMO_HOME found')
    sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))

# sumoBinary = checkBinary('sumo-gui')
sumoBinary = checkBinary('sumo')
roadNetwork = "./config/osm.sumocfg"
sumoCmd = [sumoBinary, "-c", roadNetwork, "--start", "--quit-on-end"]
# use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device: " + str(device))

SUMO_HOME found
Using device: cuda


In [141]:
def intervehicleConnectivity(threshold = None):
    xs = []
    ys = []
    for vehicle in traci.vehicle.getIDList():
        x, y = traci.vehicle.getPosition(vehicle)
        xs.append(x)
        ys.append(y)
    xs = torch.tensor(xs, dtype=torch.float32).view(-1,1)
    ys = torch.tensor(ys, dtype=torch.float32).view(-1,1)
    intervehicle_distances = torch.sqrt((xs - xs.t())**2 + (ys - ys.t())**2)
    if threshold is not None:
        # make the distances 1 if less than the threshold, 0 otherwise
        connectivity = torch.where(intervehicle_distances < threshold, torch.ones_like(intervehicle_distances), torch.zeros_like(intervehicle_distances))
    return connectivity, xs, ys, intervehicle_distances

In [142]:
def randomTrips(dur=1000, density=12):
    os.system("python $SUMO_HOME/tools/randomTrips.py -n config/osm.net.xml.gz -r config/osm.passenger.trips.xml -e " + str(dur) + " -l --insertion-density=" + str(density))

def shouldContinueSim():
    numVehicles = traci.simulation.getMinExpectedNumber()
    return True if numVehicles > 0 else False

def restart(sumoCmd):
    with io.StringIO() as buf, redirect_stdout(buf):
        try:
            traci.close()
        except:
            pass
        traci.start(sumoCmd)

def close():
    traci.close()

randomTrips(800, 1.5)

Success.


In [143]:
def bfs_distance(adj_matrix):
    n_hop_matrix = torch.zeros_like(adj_matrix)
    for start_node in range(adj_matrix.size(0)):
        visited = [0] * adj_matrix.size(0)
        queue = deque([(start_node, 0)])
        visited[start_node] = 0
        
        while queue:
            current_node, current_dist = queue.popleft()
            
            for neighbor, connected in enumerate(adj_matrix[current_node]):
                if connected and not visited[neighbor]:
                    queue.append((neighbor, current_dist + 1))
                    visited[neighbor] = True
                    n_hop_matrix[start_node, neighbor] = current_dist + 1
    
    return n_hop_matrix

In [301]:
class RoutingGym(gym.Env):
    def __init__(self, sumoCmd, max_steps=1100, n_nodes=57):
        self.sumoCmd = sumoCmd
        self.step_counter = 0
        self.max_steps = max_steps
        self.n_nodes = n_nodes
        self.vehicle_ids = None
        self.start_node = None
        self.end_node = None
        self.start_x = None
        self.start_y = None
        self.end_x = None
        self.end_y = None
        self.xs = None
        self.ys = None
        self.visited = torch.zeros(n_nodes)
        self.distance_to_end = None
        self.current_node = None
        self.node_features = None
        self.adj_matrix = None
        self.edge_index = None
        self.hop_thresh = None
        self.norm_x = None
        self.norm_y = None
        self.norm_xy = None
        self.routing_done = False
    
    def render(self):
        self.show_gui = True

    def reset(self):
        try:
            traci.close()
        except:
            pass
        traci.start(sumoCmd)
        self.step_counter = 0

        while self.step_counter < 400:
            traci.simulationStep()
            self.step_counter += 1

    def step(self):
        traci.simulationStep()
        self.routing_done = False
        self.step_counter += 1
        self.vehicle_ids = traci.vehicle.getIDList()
        self.adj_matrix, self.xs, self.ys, inter_vehicle_distance = intervehicleConnectivity(800)
        self.edge_index, _ = dense_to_sparse(self.adj_matrix)
        self.select_start_end_nodes()
        self.distance_to_end = inter_vehicle_distance[self.end_node]
        self.distance_to_end = F.pad(self.distance_to_end, (0, 57 - self.distance_to_end.size(0)), "constant", 0)
        self.visited = torch.zeros_like(self.visited)
        current_node_indicators = torch.zeros_like(self.visited)
        current_node_indicators[self.current_node] = 1
        self.node_features = torch.stack((self.xs, self.ys, self.visited, current_node_indicators, self.distance_to_end)).T
        
        return self.node_features.to(device)

    def select_start_end_nodes(self):
        n_hop_matrix = bfs_distance(self.adj_matrix)
        self.hop_thresh = min(n_hop_matrix.max(), 5)
        starts, ends = torch.where(n_hop_matrix >= self.hop_thresh)
        starts = starts.tolist()
        ends = ends.tolist()
        self.start_node, self.end_node = random.choice(list(zip(starts, ends)))
        self.current_node = self.start_node
        self.start_x = self.xs[self.start_node].item()
        self.start_y = self.ys[self.start_node].item()
        self.end_x = self.xs[self.end_node].item()
        self.end_y = self.ys[self.end_node].item()
        self.norm_x = self.end_x - self.start_x
        self.norm_y = self.end_y - self.start_y
        self.norm_xy = np.sqrt(self.norm_x**2 + self.norm_y**2)

        self.xs = self.xs.squeeze() / self.norm_x
        self.ys = self.ys.squeeze() / self.norm_y
        # padding with zeros
        self.xs = F.pad(self.xs, (0, 57 - self.xs.size(0)), "constant", 0)
        self.ys = F.pad(self.ys, (0, 57 - self.ys.size(0)), "constant", 0)


    def act(self, next_hop):
        # check if the next hop is reachable
        if self.adj_matrix[self.current_node, next_hop] == 1:
            self.visited[self.current_node] = 1
            self.current_node = next_hop
            curr_node_indicators = torch.zeros_like(self.visited)
            curr_node_indicators[self.current_node] = 1
            self.node_features = torch.stack((self.xs, self.ys, copy.deepcopy(self.visited), curr_node_indicators, self.distance_to_end)).T
            reward = self.compute_reward(next_hop)
            return self.node_features.to(device), reward.to(device), self.routing_done
        else:
            return self.node_features.to(device), torch.tensor([[-1]]).to(device), False
    
    def get_action_mask(self):
        action_mask = copy.deepcopy(self.adj_matrix[self.current_node])
        action_mask = F.pad(action_mask, (0, self.n_nodes - action_mask.size(0)), "constant", 0).to(device)
        return action_mask

    def get_adj_matrix(self):
        return copy.deepcopy(self.adj_matrix).to(device)
    
    def get_edge_index(self):
        return copy.deepcopy(self.edge_index).to(device)
        
    def compute_reward(self, next_hop):
        this_dist_2_end = self.distance_to_end[self.current_node]
        next_dist_2_end = self.distance_to_end[next_hop]
        reward = (this_dist_2_end - next_dist_2_end) / self.norm_xy
        if next_hop == self.end_node:
            self.routing_done = True
            return reward + self.hop_thresh * 2
        else:
            return -1 + reward
        
    def sim_done(self):
        """
        function: get the done state of simulation.
        """
        return not (shouldContinueSim() and self.step_counter <= self.max_steps)

In [302]:
class GDQN(nn.Module):
    def __init__(self, in_channels=5, n_nodes=57, hidden_dim=32, dropout=0.1):
        super(GDQN, self).__init__()
        self.convs1 = GCNConv(in_channels, hidden_dim)
        self.convs2 = GCNConv(hidden_dim, hidden_dim)
        self.convs3 = GCNConv(hidden_dim, hidden_dim)
        self.convs4 = GCNConv(hidden_dim, hidden_dim)
        self.convs5 = GCNConv(hidden_dim, 1)
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(n_nodes, n_nodes)
        self.softmax = nn.Softmax(dim=1)
        self.relu = nn.ReLU()

    def forward(self, x, edge_index, action_mask):
        x = self.convs1(x, edge_index)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.convs2(x, edge_index)
        x = self.relu(x)
        x = self.convs3(x, edge_index)
        x = self.relu(x)
        x = self.convs4(x, edge_index)
        x = self.relu(x)
        x = self.convs5(x, edge_index)
        x = self.relu(x)
        x = x.T
        x = self.fc1(x)
        x = x * action_mask
        x = self.softmax(x)
        return x



Transition = namedtuple('Transition',
                        ('state', 'edge_index', 'action_mask', 'action', 'next_state', 'reward'))


class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)


In [303]:
BATCH_SIZE = 128
GAMMA = 0.99
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 1000
TAU = 0.005
LR = 1e-4


# Get number of actions from gym action space
n_nodes = 57
env = RoutingGym(sumoCmd, 1100, n_nodes)
# Get the number of state observations

policy_net = GDQN().to(device)
target_net = GDQN().to(device)
target_net.load_state_dict(policy_net.state_dict())

optimizer = optim.AdamW(policy_net.parameters(), lr=LR, amsgrad=True)
memory = ReplayMemory(1000)


steps_done = 0


def select_action(node_features, edge_index, action_mask):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            # t.max(1) will return the largest column value of each row.
            # second column on max result is index of where max element was
            # found, so we pick action with the larger expected reward.
            return policy_net(node_features, edge_index, action_mask).max(1).indices.view(1, 1)
    else:
        action_space = torch.where(action_mask == 1)[0]
        return torch.tensor([[random.choice(action_space)]], device=device, dtype=torch.long)

In [304]:
def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
    # detailed explanation). This converts batch-array of Transitions
    # to Transition of batch-arrays.
    batch = Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    # (a final state would've been the one after which simulation ended)
    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    state_batch = torch.stack(batch.state)
    action_batch = torch.stack(batch.action)
    reward_batch = torch.stack(batch.reward)
    edge_index_batch = torch.stack(batch.edge_index)
    action_mask_batch = torch.stack(batch.action_mask)
  

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken. These are the actions which would've been taken
    # for each batch state according to policy_net
    print(state_batch.shape, edge_index_batch.shape, action_mask_batch.shape)
    state_action_values = policy_net(state_batch, edge_index_batch, action_mask_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    # Expected values of actions for non_final_next_states are computed based
    # on the "older" target_net; selecting their best reward with max(1).values
    # This is merged based on the mask, such that we'll have either the expected
    # state value or 0 in case the state was final.
    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    with torch.no_grad():
        next_state_values[non_final_mask] = target_net(non_final_next_states).max(1).values
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    criterion = nn.SmoothL1Loss()
    loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    # In-place gradient clipping
    torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100)
    optimizer.step()


In [305]:
# Initialize the environment and get its state
env.reset()
done = False
while not done:
    done = env.sim_done()
    state = env.step()
    edge_index = env.get_edge_index()
    routing_done = False
    while not routing_done:
        action_mask = env.get_action_mask()
        action = select_action(state, edge_index, action_mask)
        node_features, reward, routing_done = env.act(action.item())
        reward = torch.tensor([reward], device=device)

        if routing_done:
            next_state = None
        else:
            next_state = node_features

        # Store the transition in memory
        memory.push(state, edge_index, action_mask, action, next_state, reward)

        # Move to the next state
        state = next_state

        # Perform one step of the optimization (on the policy network)
        optimize_model()

        # Soft update of the target network's weights
        # θ′ ← τ θ + (1 −τ )θ′
        target_net_state_dict = target_net.state_dict()
        policy_net_state_dict = policy_net.state_dict()
        for key in policy_net_state_dict:
            target_net_state_dict[key] = policy_net_state_dict[key]*TAU + target_net_state_dict[key]*(1-TAU)
        target_net.load_state_dict(target_net_state_dict)

print('Complete')
plt.ioff()
plt.show()


Simulation ended at time: 401.00
Reason: TraCI requested termination.
Performance: 
 Duration: 157.26s
 TraCI-Duration: 0.01s
 Real time factor: 2.5499
 UPS: 54.177450
Vehicles: 
 Inserted: 45 (Loaded: 48)
 Running: 39
 Waiting: 0
Statistics (avg of 6):
 RouteLength: 1965.69
 Speed: 10.02
 Duration: 203.50
 WaitingTime: 1.83
 TimeLoss: 23.67
 DepartDelay: 0.35

 Retrying in 1 seconds
***Starting server on port 46305 ***
Loading net-file from './config/osm.net.xml.gz' ... done (109ms).
Loading done.
Simulation version 1.20.0 started with time: 0.00.
torch.Size([128, 57, 5]) torch.Size([128, 2, 143]) torch.Size([128, 57])


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 128 but got size 2 for tensor number 1 in the list.

In [308]:
import torch
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

# Step 1: Prepare individual graph data objects
graphs = []
for i in range(128):  # Assuming we have 128 graphs
    # Example edge index and node features for each graph
    edge_index = torch.tensor([[0, 1, 1, 2],
                               [1, 0, 2, 1]], dtype=torch.long)
    if i == 1:
        edge_index = torch.tensor([[0, 1, 1, 2, 2, 3],
                                   [1, 0, 2, 1, 3, 2]], dtype=torch.long)
    elif i == 2:
        edge_index = torch.tensor([[0],
                               [1]], dtype=torch.long)
    x = torch.tensor([[1, 2],
                      [3, 4],
                      [5, 6]], dtype=torch.float)
    
    # Create a Data object for each graph
    data = Data(x=x, edge_index=edge_index)
    graphs.append(data)

# Step 2: Create a DataLoader for batching
loader = DataLoader(graphs, batch_size=32, shuffle=True)  # Adjust batch_size as needed

# Step 3: Define a simple GCN model for demonstration
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels=2, out_channels=16)
        self.conv2 = GCNConv(in_channels=16, out_channels=2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Instantiate the model
model = GCN()

# Step 4: Iterate over the DataLoader and pass batches through the model
for batch in loader:
    out = model(batch)
    print(out)
    # You can now perform loss computation, backpropagation, etc.

tensor([[-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3355, -0.9334],
        [-0.4502, -1.1951],
        [-0.5298, -1.3113],
        [-0.5541, -1.3516],
        [-0.6733, -1.5651],
        [-0.6088, -1.3865],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1.4015],
        [-0.3666, -0.9978],
        [-0.5948, -1.4612],
        [-0.6101, -1