In [2]:
import numpy as np
import math
import heapq
import ray
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from gym.spaces import Discrete, MultiDiscrete
from ray import tune, air
from ray.rllib.algorithms.ppo import PPOConfig
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from ray.tune.registry import register_env
from ray.rllib.utils.pre_checks.env import check_env

In [60]:
# define the custom routing grid environment

# action space encoding
    # 0 - > move up
    # 1 - > move down
    # 2 - > move right
    # 3 - > move left
# state (observation) space encoding
    # state[0:1] = current agent position
    # state[2:3] = current goal position
    # state[4:7] = capacities of the four neighboring edges of the current agent position. Namely, up, right, down, and left


class RtGridEnv(MultiAgentEnv):
    def __init__(self, length:int, width:int, nets:list, macros:list, edge_capacity:np.ndarray, max_step:int):
        """
        Args:
            length (int): length of the canvas
            width (int): width of the canvas
            nets (list): a list of nets to be routed
            macros (list): a list of macros that has been placed on the canvas by placement
        """
        self.length = length
        self.width = width
        self.nets = nets
        self.n_nets = len(self.nets)
        self.macros = macros
        self.initial_capacity = edge_capacity.copy()
        self.initial_capacity.setflags(write=False)
        self.edge_capacity = edge_capacity.copy()
        self.max_capacity = np.max(self.edge_capacity) + 1 # plus one to account for the behavior of gym.MultiDiscrete
        self.max_step = max_step
        self.step_counter = 0 # counts the number of steps elapsed for the current episode
        
        self.agents_id = []
        for i in range(self.n_nets):
            self.agents_id.append("agent_{}".format(i))
        self.state = {}
        self.agent_position = {}
        self.goal_position = {}
        self.change_pin_flag = self.reset_flags({})
        self.done_flag = self.reset_flags({})
        #print("change pin flags ", self.change_pin_flag)
        # the done flag needs an additional "__all__" key to indicate all agents are done
        self.done_flag["__all__"] = False
        #print("done flags ", self.done_flag)
        self.pin_counter = {}
        self.reset_pin_counters()
        self.path_x = self.generate_path(self.nets)
        self.path_y = self.generate_path(self.nets)
        #print(self.path_x)
        self.decomposed_nets = {}
        for i in range(self.n_nets): 
            self.decomposed_nets[self.agents_id[i]] = self.prim_mst(self.nets[i])
        
        # initialize the agent to route the first 2-pin net decomposed from the first multi-pin net
        for agent_id in self.agents_id:
            self.update_positions(agent_id)
            self.update_path(agent_id)
        #print(self.agent_position)
        #print(self.goal_position)
        # initialize the path lists
        #for agent_id in self.agents_id:
            #self.update_path(agent_id)
        #print(self.path_x)
        #print(self.path_y)
        # define the action and the observation space
        self.action_space = Discrete(4)
        self.observation_space = MultiDiscrete(
            [
                self.length, 
                self.width, 
                self.length, 
                self.width, 
                self.max_capacity, 
                self.max_capacity, 
                self.max_capacity, 
                self.max_capacity
            ]
        )

    def update_positions(self, agent_id:str):
        """
        Update the agent position with the starting pin of the next 2-pin net.
        Update the goal position with the new goal.
        """
        self.agent_position[agent_id] = np.array(self.decomposed_nets[agent_id]['u'][self.pin_counter[agent_id]])
        self.goal_position[agent_id] = np.array(self.decomposed_nets[agent_id]['v'][self.pin_counter[agent_id]])

    def update_path(self, agent_id:str):
        """Update the path agent has traveled."""
        self.path_x[agent_id][self.pin_counter[agent_id]].append(self.agent_position[agent_id][0])
        self.path_y[agent_id][self.pin_counter[agent_id]].append(self.agent_position[agent_id][1])

    def reset_pin_counters(self):
        """Set the pin counter of each agent to 0."""
        for i in range(self.n_nets):
            self.pin_counter[self.agents_id[i]] = 0
    
    def reset_flags(self, flags:dict):
        for i in range(self.n_nets):
            flags[self.agents_id[i]] = False

        return flags

    def generate_path(self, nets:list):
        """Generate the list data structure to hold the path traveled by the agent."""
        path = {}
        for i in range(len(nets)):
            path[self.agents_id[i]] = []
            for j in range(len(nets[i])-1):
                path[self.agents_id[i]].append([])

        return path

    def prim_mst(self, pins):
        """
        Compute the Minimum Spanning Tree (MST) using Prim's algorithm.

        Args:
            pins (list): List of (x, y) coordinates representing the pin locations.

        Returns:
            dict: a dictionary containing the vertices of all the edges in the MST

        Note:
            - The pins list should contain at least two points.
        """

        def euclidean_distance(p1, p2):
            """
            Compute the Euclidean distance between two points.

            Args:
                p1 (tuple): First point (x, y) coordinates.
                p2 (tuple): Second point (x, y) coordinates.

            Returns:
                float: Euclidean distance between the two points.
            """
            x1, y1 = p1
            x2, y2 = p2
            return math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
        
        distances = {}
        for i in range(len(pins)):
            for j in range(i+1, len(pins)):
                p1 = pins[i]
                p2 = pins[j]
                distances[(i, j)] = euclidean_distance(p1, p2)
                distances[(j, i)] = distances[(i, j)]  # Add symmetric distance
        
        # Initialize
        num_pins = len(pins)
        visited = [False] * num_pins
        mst_u = []
        mst_v = []
        start_vertex = 0
        visited[start_vertex] = True
        
        # Create a priority queue
        pq = []
        
        # Mark the initial vertex as visited
        for i in range(num_pins):
            if i != start_vertex:
                heapq.heappush(pq, (distances[(start_vertex, i)], start_vertex, i))
        
        # Update the priority queue and perform Prim's algorithm
        while pq:
            if (len(mst_u) == len(pins) -1): # for n pins, the MST should at most have n-1 edges
                break

            weight, u, v = heapq.heappop(pq)
            
            if visited[v]:
                #print(f"Skipping edge: {weight} - {u} - {v}")
                continue
            
            # Prim's algorithm iteration
            visited[v] = True
            mst_u.append(pins[u])
            mst_v.append(pins[v])
            
            for i in range(num_pins):
                if not visited[i]:
                    heapq.heappush(pq, (distances[(v, i)], v, i))
        
        mst = {'u':mst_u,'v':mst_v}

        return mst
    
    def update_capacity(self, agent_position:np.ndarray, action:int):
        """
        Update the edge capacities after taking an action
        """

        # reduce the capacity of the current node
        self.edge_capacity[agent_position[0]][agent_position[1]][action] += -1

        # reduce the capacity of the next node's corresponding edge
        new_node = self.compute_new_position(agent_position, action)
        corresponding_edge = (action + 2) % 4
        self.edge_capacity[new_node[0]][new_node[1]][corresponding_edge] += -1
        
    def compute_new_position(self, agent_position:np.ndarray, action:int):
        """
        Compute new agent positions
        """
        if action == 0: # up
            new_position = (agent_position[0], agent_position[1]+1)
        elif action == 1: # right
            new_position = (agent_position[0]+1, agent_position[1])
        elif action == 2: # down
            new_position = (agent_position[0], agent_position[1]-1)
        elif action == 3: # left
            new_position = (agent_position[0]-1, agent_position[1])
        
        return new_position
    
    def step(self, action:dict):
        def check_move_validity(agent_position:np.ndarray, action:int):
            """
            Check whether a move is valid by checking:
            (1) the edge the move is about to use has capacity greater than 0
            (2) the position after the move is not within macro regions
            (3) the position after the move in within in the routing canvas
            Returns True if the move is valid, False if invalid
            """
            # capacity of the 4 neighboring edges of the current agent position
            node_capacity = self.edge_capacity[agent_position[0]][agent_position[1]]

            new_position = self.compute_new_position(agent_position, action)

            macro_flag = new_position not in self.macros
            bound_flag = new_position[0] in range(self.length) and new_position[1] in range(self.width)
            capacity_flag = node_capacity[action] > 0

            valid = macro_flag and bound_flag and capacity_flag

            return valid
        
        # extract all the active agents in this time step
        active_agent = list(action.keys())
        #print("active agent ", active_agent)
        reward = {}

        # if we have reached our maximum time step, set the all done flag
        self.step_counter += 1
        if self.step_counter >= self.max_step:
            for agent_id in active_agent:
                reward[agent_id] = -1
            self.done_flag["__all__"] = True
            #print("reward ", reward)
            #print("done flags ", self.done_flag)
            return self.state, reward, self.done_flag, {}
        
        # update pins for those agents in need
        # Only agents that have the change pin flag set to True AND are active, will be updated
        pin_flag_agents = [key for key, value in self.change_pin_flag.items() if value]
        pin_flag_agents = list(set(active_agent).intersection(set(pin_flag_agents)))
        for agent_id in pin_flag_agents:
            #print(agent_id)
            self.update_positions(agent_id)
            self.update_path(agent_id)
            reward[agent_id] = 0
            self.change_pin_flag[agent_id] = False
            self.state[agent_id] = np.concatenate([
                self.agent_position[agent_id], 
                self.goal_position[agent_id], 
                self.edge_capacity[self.agent_position[agent_id][0]][self.agent_position[agent_id][1]]
                ])
            active_agent.remove(agent_id) # de-active agents that undergoes pin-upgrading, such that they won't be unintentionally accessed
        #TODO: COMPUTE NEW STATE AND REWARD FOR ACTIVE AGENTS
        for agent_id in active_agent:
            if check_move_validity(self.agent_position[agent_id], action[agent_id]):
                self.update_capacity(self.agent_position[agent_id], action[agent_id])
                self.agent_position[agent_id] = np.array(list(self.compute_new_position(self.agent_position[agent_id], action[agent_id])))
                self.update_path(agent_id)

            if np.array_equal(self.agent_position[agent_id], self.goal_position[agent_id]):
                reward[agent_id] = 1000
                self.update_counters(agent_id)
            else:
                reward[agent_id] = -1
            
            self.state[agent_id] = np.concatenate([
                self.agent_position[agent_id], 
                self.goal_position[agent_id], 
                self.edge_capacity[self.agent_position[agent_id][0]][self.agent_position[agent_id][1]]
                ])
        # if all agents are done, set the __all__ flag
        self.done_flag["__all__"] = all(self.done_flag[agent_key] for agent_key in self.done_flag if agent_key.startswith('agent_'))
        
        return self.state, reward, self.done_flag, {}
        #print("state ", self.state)
        #print("change pin flag ", self.change_pin_flag)
        #print("reward ", reward)

    def update_counters(self, agent_id:str):
        # one 2-pin net within one multi-pin net is done
        self.pin_counter[agent_id] += 1
        self.change_pin_flag[agent_id] = True
        net_id = int(agent_id.split("_")[1])

        if self.pin_counter[agent_id] == len(self.nets[net_id]) - 1:
            # this agent is done, it has routed all the pins
            #self.pin_counter[agent_id] = 0
            self.change_pin_flag[agent_id] = False
            self.done_flag[agent_id] = True
    
    def render(self):
        pass

    def reset(self):
        #print("RESETTING...")
        self.reset_pin_counters()
        #print("pin counters ", self.pin_counter)
        self.step_counter = 0
        self.change_pin_flag = self.reset_flags(self.change_pin_flag)
        self.done_flag = self.reset_flags(self.done_flag)
        self.done_flag["__all__"] = False
        #print("change pin flags ", self.change_pin_flag)
        #print("done flags ", self.done_flag)
        self.edge_capacity = self.initial_capacity.copy()
        self.path_x = self.generate_path(self.nets)
        self.path_y = self.generate_path(self.nets)
        #print("path x ", self.path_x)
        #print("path y ", self.path_y)
        for agent_id in self.agents_id:
            self.update_positions(agent_id)
            self.update_path(agent_id)
        #for agent_id in self.agents_id:
            #self.update_path(agent_id)
        #print("agent positions ", self.agent_position)
        #print("goal positions ", self.goal_position)
        #print("path x ", self.path_x)
        #print("path y ", self.path_y)
        for agent_id in self.agents_id:
            individual_state = np.concatenate([
                self.agent_position[agent_id], 
                self.goal_position[agent_id], 
                self.edge_capacity[self.agent_position[agent_id][0]][self.agent_position[agent_id][1]]
                ])
            self.state[agent_id] = individual_state
        
        return self.state


In [61]:
nets = [[(2,1), (2,3), (3,3)],[(1,4), (3,4), (3,2)], [(1,5), (1,2)], [(0,5), (3,5), (2,2)], [(3,1), (4,3)], [(0,0), (5,0), (5,5)]]
macros = [(0,2), (0,4)]
length = 6
width = 6
n_nets = len(nets)
edge_capacity = np.full((length,width,4),n_nets)
max_step = 1000

env = RtGridEnv(length, width, nets, macros, edge_capacity, max_step)
obs = env.reset()
print(obs)

'''env.step_counter = 999
env.pin_counter = {"agent_0":2, "agent_1":1, "agent_2":0, "agent_3":0, "agent_4":0, "agent_5":0}
env.change_pin_flag = {"agent_0":True, "agent_1":True, "agent_2":False, "agent_3":False, "agent_4":False, "agent_5":False}
action = {"agent_0":0, "agent_1":1, "agent_2":2, "agent_3":3, "agent_4":0}
env.step(action)'''

{'agent_0': array([2, 1, 2, 3, 6, 6, 6, 6]), 'agent_1': array([1, 4, 3, 4, 6, 6, 6, 6]), 'agent_2': array([1, 5, 1, 2, 6, 6, 6, 6]), 'agent_3': array([0, 5, 3, 5, 6, 6, 6, 6]), 'agent_4': array([3, 1, 4, 3, 6, 6, 6, 6]), 'agent_5': array([0, 0, 5, 0, 6, 6, 6, 6])}


'env.step_counter = 999\nenv.pin_counter = {"agent_0":2, "agent_1":1, "agent_2":0, "agent_3":0, "agent_4":0, "agent_5":0}\nenv.change_pin_flag = {"agent_0":True, "agent_1":True, "agent_2":False, "agent_3":False, "agent_4":False, "agent_5":False}\naction = {"agent_0":0, "agent_1":1, "agent_2":2, "agent_3":3, "agent_4":0}\nenv.step(action)'

In [62]:
action = {"agent_0":0, "agent_1":1, "agent_2":2, "agent_3":1, "agent_4":1, "agent_5":1}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)



obs  {'agent_0': array([2, 2, 2, 3, 6, 6, 5, 6]), 'agent_1': array([2, 4, 3, 4, 6, 6, 6, 5]), 'agent_2': array([1, 4, 1, 2, 5, 5, 6, 6]), 'agent_3': array([1, 5, 3, 5, 6, 6, 5, 5]), 'agent_4': array([4, 1, 4, 3, 6, 6, 6, 5]), 'agent_5': array([1, 0, 5, 0, 6, 6, 6, 5])}
reward {'agent_0': -1, 'agent_1': -1, 'agent_2': -1, 'agent_3': -1, 'agent_4': -1, 'agent_5': -1}
done {'agent_0': False, 'agent_1': False, 'agent_2': False, 'agent_3': False, 'agent_4': False, 'agent_5': False, '__all__': False}


In [63]:
action = {"agent_0":0, "agent_1":1, "agent_2":2, "agent_3":1, "agent_4":0, "agent_5":1}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)

obs  {'agent_0': array([2, 3, 2, 3, 6, 6, 5, 6]), 'agent_1': array([3, 4, 3, 4, 6, 6, 6, 5]), 'agent_2': array([1, 3, 1, 2, 5, 6, 6, 6]), 'agent_3': array([2, 5, 3, 5, 6, 6, 6, 5]), 'agent_4': array([4, 2, 4, 3, 6, 6, 5, 6]), 'agent_5': array([2, 0, 5, 0, 6, 6, 6, 5])}
reward {'agent_0': 1000, 'agent_1': 1000, 'agent_2': -1, 'agent_3': -1, 'agent_4': -1, 'agent_5': -1}
done {'agent_0': False, 'agent_1': False, 'agent_2': False, 'agent_3': False, 'agent_4': False, 'agent_5': False, '__all__': False}


In [64]:
action = {"agent_0":0, "agent_1":1, "agent_2":2, "agent_3":1, "agent_4":0, "agent_5":1}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)

obs  {'agent_0': array([2, 3, 3, 3, 6, 6, 5, 6]), 'agent_1': array([3, 4, 3, 2, 6, 6, 6, 5]), 'agent_2': array([1, 2, 1, 2, 5, 6, 6, 6]), 'agent_3': array([3, 5, 3, 5, 6, 6, 6, 5]), 'agent_4': array([4, 3, 4, 3, 6, 6, 5, 6]), 'agent_5': array([3, 0, 5, 0, 6, 6, 6, 5])}
reward {'agent_1': 0, 'agent_0': 0, 'agent_2': 1000, 'agent_3': 1000, 'agent_4': 1000, 'agent_5': -1}
done {'agent_0': False, 'agent_1': False, 'agent_2': True, 'agent_3': False, 'agent_4': True, 'agent_5': False, '__all__': False}


In [65]:
action = {"agent_0":1, "agent_1":2,  "agent_3":1, "agent_5":1}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)

obs  {'agent_0': array([3, 3, 3, 3, 6, 6, 6, 5]), 'agent_1': array([3, 3, 3, 2, 5, 6, 6, 5]), 'agent_2': array([1, 2, 1, 2, 5, 6, 6, 6]), 'agent_3': array([3, 5, 2, 2, 6, 6, 6, 5]), 'agent_4': array([4, 3, 4, 3, 6, 6, 5, 6]), 'agent_5': array([4, 0, 5, 0, 6, 6, 6, 5])}
reward {'agent_3': 0, 'agent_0': 1000, 'agent_1': -1, 'agent_5': -1}
done {'agent_0': True, 'agent_1': False, 'agent_2': True, 'agent_3': False, 'agent_4': True, 'agent_5': False, '__all__': False}


In [66]:
action = { "agent_1":2,  "agent_3":3, "agent_5":1}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)

obs  {'agent_0': array([3, 3, 3, 3, 6, 6, 6, 5]), 'agent_1': array([3, 2, 3, 2, 5, 6, 6, 6]), 'agent_2': array([1, 2, 1, 2, 5, 6, 6, 6]), 'agent_3': array([2, 5, 2, 2, 6, 4, 6, 5]), 'agent_4': array([4, 3, 4, 3, 6, 6, 5, 6]), 'agent_5': array([5, 0, 5, 0, 6, 6, 6, 5])}
reward {'agent_1': 1000, 'agent_3': -1, 'agent_5': 1000}
done {'agent_0': True, 'agent_1': True, 'agent_2': True, 'agent_3': False, 'agent_4': True, 'agent_5': False, '__all__': False}


In [67]:
action = {"agent_3":2, "agent_5":2}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)

obs  {'agent_0': array([3, 3, 3, 3, 6, 6, 6, 5]), 'agent_1': array([3, 2, 3, 2, 5, 6, 6, 6]), 'agent_2': array([1, 2, 1, 2, 5, 6, 6, 6]), 'agent_3': array([2, 4, 2, 2, 5, 5, 6, 5]), 'agent_4': array([4, 3, 4, 3, 6, 6, 5, 6]), 'agent_5': array([5, 0, 5, 5, 6, 6, 6, 5])}
reward {'agent_5': 0, 'agent_3': -1}
done {'agent_0': True, 'agent_1': True, 'agent_2': True, 'agent_3': False, 'agent_4': True, 'agent_5': False, '__all__': False}


In [68]:
action = {"agent_3":2, "agent_5":1}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)

obs  {'agent_0': array([3, 3, 3, 3, 6, 6, 6, 5]), 'agent_1': array([3, 2, 3, 2, 5, 6, 6, 6]), 'agent_2': array([1, 2, 1, 2, 5, 6, 6, 6]), 'agent_3': array([2, 3, 2, 2, 5, 5, 5, 6]), 'agent_4': array([4, 3, 4, 3, 6, 6, 5, 6]), 'agent_5': array([5, 0, 5, 5, 6, 6, 6, 5])}
reward {'agent_3': -1, 'agent_5': -1}
done {'agent_0': True, 'agent_1': True, 'agent_2': True, 'agent_3': False, 'agent_4': True, 'agent_5': False, '__all__': False}


In [69]:
action = {"agent_3":2, "agent_5":0}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)

obs  {'agent_0': array([3, 3, 3, 3, 6, 6, 6, 5]), 'agent_1': array([3, 2, 3, 2, 5, 6, 6, 6]), 'agent_2': array([1, 2, 1, 2, 5, 6, 6, 6]), 'agent_3': array([2, 2, 2, 2, 4, 6, 5, 6]), 'agent_4': array([4, 3, 4, 3, 6, 6, 5, 6]), 'agent_5': array([5, 1, 5, 5, 6, 6, 5, 6])}
reward {'agent_3': 1000, 'agent_5': -1}
done {'agent_0': True, 'agent_1': True, 'agent_2': True, 'agent_3': True, 'agent_4': True, 'agent_5': False, '__all__': False}


In [76]:
action = {"agent_5":0}
obs, reward, done, info = env.step(action)
print("obs ", obs)
print("reward", reward)
print("done", done)

obs  {'agent_0': array([3, 3, 3, 3, 6, 6, 6, 5]), 'agent_1': array([3, 2, 3, 2, 5, 6, 6, 6]), 'agent_2': array([1, 2, 1, 2, 5, 6, 6, 6]), 'agent_3': array([2, 2, 2, 2, 4, 6, 5, 6]), 'agent_4': array([4, 3, 4, 3, 6, 6, 5, 6]), 'agent_5': array([5, 0, 5, 5, 5, 6, 6, 5])}
reward {'agent_5': 0}
done {'agent_0': True, 'agent_1': True, 'agent_2': True, 'agent_3': True, 'agent_4': True, 'agent_5': True, '__all__': True}


In [77]:
print(env.path_x)
print(env.path_y)

{'agent_0': [[2, 2, 2], [2, 3]], 'agent_1': [[1, 2, 3], [3, 3, 3]], 'agent_2': [[1, 1, 1, 1]], 'agent_3': [[0, 1, 2, 3], [3, 2, 2, 2, 2]], 'agent_4': [[3, 4, 4, 4]], 'agent_5': [[0, 1, 2, 3, 4, 5], [5, 5, 5, 5, 5, 5, 5]]}
{'agent_0': [[1, 2, 3], [3, 3]], 'agent_1': [[4, 4, 4], [4, 3, 2]], 'agent_2': [[5, 4, 3, 2]], 'agent_3': [[5, 5, 5, 5], [5, 5, 4, 3, 2]], 'agent_4': [[1, 1, 2, 3]], 'agent_5': [[0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5, 0]]}


In [79]:
obs = env.reset()
print(obs)
print(env.path_x)
print(env.path_y)
print(env.done_flag)
print(env.pin_counter)
print(env.change_pin_flag)

{'agent_0': array([2, 1, 2, 3, 6, 6, 6, 6]), 'agent_1': array([1, 4, 3, 4, 6, 6, 6, 6]), 'agent_2': array([1, 5, 1, 2, 6, 6, 6, 6]), 'agent_3': array([0, 5, 3, 5, 6, 6, 6, 6]), 'agent_4': array([3, 1, 4, 3, 6, 6, 6, 6]), 'agent_5': array([0, 0, 5, 0, 6, 6, 6, 6])}
{'agent_0': [[2], []], 'agent_1': [[1], []], 'agent_2': [[1]], 'agent_3': [[0], []], 'agent_4': [[3]], 'agent_5': [[0], []]}
{'agent_0': [[1], []], 'agent_1': [[4], []], 'agent_2': [[5]], 'agent_3': [[5], []], 'agent_4': [[1]], 'agent_5': [[0], []]}
{'agent_0': False, 'agent_1': False, 'agent_2': False, 'agent_3': False, 'agent_4': False, 'agent_5': False, '__all__': False}
{'agent_0': 0, 'agent_1': 0, 'agent_2': 0, 'agent_3': 0, 'agent_4': 0, 'agent_5': 0}
{'agent_0': False, 'agent_1': False, 'agent_2': False, 'agent_3': False, 'agent_4': False, 'agent_5': False}


In [4]:
#nets = [[(2,1), (2,3), (3,3), (4,2)],[(1,4), (3,4), (4,4)]]
#macros = [(0,2), (1,2)]
nets = [[(2,1), (2,3), (3,3), (4,2), (0,3),(0,1)],[(1,4), (3,4), (3,2)], [(1,5), (1,2), (1,1), (3,1)], [(0,5), (3,5), (2,2)], [(3,1), (4,3)], [(0,0), (5,0), (5,5), (4,5), (4,4)]]
macros = [(0,2), (0,4)]
length = 6
width = 6
n_nets = len(nets)
edge_capacity = np.full((length,width,4),n_nets)
max_step = 1000

env = RtGridEnv(length, width, nets, macros, edge_capacity, max_step)
num_episodes = 3

for ep in range(num_episodes):
    total_reward = 0
    obs = env.reset()
    print("resetting...")
    print(obs)
    done = False
    while True:
        action = env.action_space.sample()
        #print(action)
        new_obs, reward, done, info = env.step(action)
        #print(new_obs)
        total_reward += reward
            
        #print(f"episode: {ep}")
        #print(f"obs: {new_obs}, reward: {total_reward}, done: {done}")

        if done:
            break
    print(total_reward)
    env.render()
    env.heatmap()
    #print(env.edge_capacity)

change pin flags  {'agent_0': False, 'agent_1': False, 'agent_2': False, 'agent_3': False, 'agent_4': False, 'agent_5': False}
done flags  {'agent_0': False, 'agent_1': False, 'agent_2': False, 'agent_3': False, 'agent_4': False, 'agent_5': False, '__all__': False}
agent positions  {'agent_0': array([2, 1]), 'agent_1': array([1, 4]), 'agent_2': array([1, 5]), 'agent_3': array([0, 5]), 'agent_4': array([3, 1]), 'agent_5': array([0, 0])}
goal positions  {'agent_0': array([2, 3]), 'agent_1': array([3, 4]), 'agent_2': array([1, 2]), 'agent_3': array([3, 5]), 'agent_4': array([4, 3]), 'agent_5': array([5, 0])}
path x  {'agent_0': [[2], [], [], [], []], 'agent_1': [[1], []], 'agent_2': [[1], [], []], 'agent_3': [[0], []], 'agent_4': [[3]], 'agent_5': [[0], [], [], []]}
path y  {'agent_0': [[1], [], [], [], []], 'agent_1': [[4], []], 'agent_2': [[5], [], []], 'agent_3': [[5], []], 'agent_4': [[1]], 'agent_5': [[0], [], [], []]}
resetting...
{'agent_0': array([2, 1, 2, 3, 6, 6, 6, 6]), 'agent_

AttributeError: 'int' object has no attribute 'keys'