NOTES:

In [None]:
from sage.all import graphs
import copy, sys, math


def available_squares(cop_pos: list, robber_pos: tuple) -> list:
    #Function that returns the set of squares that are available to the robber
    #return the adjacent vertices of robber_pos \ adjacent vertices of cops    
    c_neighbors = set(cop_pos)
    for cop in cop_pos:
        c_neighbors = c_neighbors.union(set(G.neighbors(cop)))

    if robber_pos == -1: 
        return set(G.vertices()) - c_neighbors
    else:
        r_neighbors = set(G.neighbors(robber_pos)).union({robber_pos})
        return r_neighbors - c_neighbors

def get_axis(cop_pos, robber_pos):
    #returns 'h', 'v', 'posd', 'negd' or none for axis which a cop position occupies
    #assuming given correct input ie intersecting but not same posn
    if cop_pos == robber_pos:
        return 'h'
        
    if robber_pos[0] == cop_pos[0]:
        return 'v'
    elif robber_pos[1] == cop_pos[1]:
        return 'h'
    elif cop_pos[0] + cop_pos[1] == robber_pos[0] + robber_pos[1]:
        return 'negd'
    elif cop_pos[0] - cop_pos[1] == robber_pos[0] - robber_pos[1]:
        return 'posd'
    else:
        return None

def remove_axes_squares(robber_pos, avail, occupied_axes):
    #filters out the squares which are on a cop occupied axis from a list of vertices
    for axis in map(lambda a: a[0], set(filter(lambda a: a[1], occupied_axes.items()))):
        if axis == 'h':
            #remove all with move[1] == robber_pos[1]
            avail = set(filter(lambda posn: posn[1] != robber_pos[1], avail))
        elif axis == 'v':
            #remove all with move[0] == robber_pos[0]
            avail = set(filter(lambda posn: posn[0] != robber_pos[0], avail))
        elif axis == 'negd':
            #remove all w/move[0]+move[1] == robber_pos[0]+robber_pos[1]
            avail = set(filter(lambda posn: posn[0] + posn[1] != robber_pos[0] + robber_pos[1], avail))
        elif axis == 'posd':
            #remove all w/move[0]-move[1] == robber_pos[0]-robber_pos[1]
            avail = set(filter(lambda posn: posn[0] - posn[1] != robber_pos[0] - robber_pos[1], avail))
    return avail

def get_intersecting_squares(cop_pos, robber_pos, occupied_axes):
    #compiles a list of the squares which cops can reach that directly attack the robber on a unique line
    avail = set(G.neighbors(cop_pos)).union({cop_pos}).intersection(
        set(G.neighbors(robber_pos)).union({robber_pos}))
    return remove_axes_squares(robber_pos, avail, occupied_axes)

def get_SD_length(robber_pos):
    #determines the length of the SD
    diff = robber_pos[0] - robber_pos[1]
    sum = robber_pos[0] + robber_pos[1]
    posd_len = len(list(filter(lambda posn: posn[0] - posn[1] == diff, G.vertices())))
    negd_len = len(list(filter(lambda posn: posn[0] + posn[1] == sum, G.vertices())))
    return min(posd_len, negd_len)

def get_min_LSD_move(robber_pos: tuple, moves: list) -> tuple:
    #determines the cop config which forces the minimum LSD
    min_LSD = sys.maxsize
    min_move = tuple()

    for move in moves:
        r_moves = available_squares(move[0], robber_pos)
        LSD = -1

        #get LSD of robber for this possible move
        for r_move in r_moves:
            SD_len = get_SD_length(r_move)
            if SD_len > LSD:
                LSD = SD_len
    
    if LSD < min_LSD:
        min_LSD = LSD
        min_move = move
    
    return min_move

def seenp(move: list, robber_pos: tuple, cop_states: list, robber_states: list)-> bool:
    if not (cop_states and robber_states): #empty states given for some reason
        return False
    
    for t in range(len(cop_states) - 1, -1, -1):
        if (set(cop_states[t]) == set(move) and robber_states[t] == robber_pos):
            print("SEEN")
            return True
        
    return False

#n^2 algo where n = # available moves, which is constant...
def remove_seen_moves(moves: list, robber_pos: tuple, cop_states: list, robber_states: list)-> list:
    #removes cop moves from a list of available cop moves if that state has already been seen

    if (len(cop_states) != len(robber_states)):
        raise Exception("given nonmatching states")
    

    new_moves = [(m,v) for (m,v) in moves
             if not seenp(m, robber_pos, cop_states, robber_states)]

    return new_moves

def get_closest_unoccupied(cop_pos: list, robber_pos: tuple, idx: int)-> tuple:
    V = G.neighbors(cop_pos[idx])

    min_dist = sys.maxsize
    min_pos = cop_pos[idx]
    
    for v in V:
        dist = math.sqrt((v[0] - robber_pos[0])**2 + (v[1] - robber_pos[1])**2)
        if dist < min_dist and v not in cop_pos:
            min_dist = dist
            min_pos = v

    return min_pos

def minimize_avail_helper(curr_cop_pos, robber_pos, i, occupied_axes, cop_moves, robber_moves):
    #goal is to find the minimizing config of cops
    #so track a min config and min robber avail squares
    #occupied axes is a dict, represents which robber axes are occupied in current backtracking iteration

    #base case i > #cops
    if i >= len(curr_cop_pos):
        return curr_cop_pos, len(available_squares(curr_cop_pos, robber_pos))

    avail = get_intersecting_squares(curr_cop_pos[i], robber_pos, occupied_axes)

    moves = list() # list(posn) -> avail_squares

    for move in avail:
        #find axes this occupies
        axis = get_axis(move, robber_pos)
        occupied_axes[axis] = True
        new_cop_pos = copy.deepcopy(curr_cop_pos)
        new_cop_pos[i] = move #move cop i
        
        curr_config, curr_squares = minimize_avail_helper(new_cop_pos, robber_pos, i+1, occupied_axes, cop_moves, robber_moves)
        moves.append((curr_config, curr_squares))
    
        occupied_axes[axis] = False

    #remove all moves which revisit board states
    moves = remove_seen_moves(moves, robber_pos, cop_moves, robber_moves)

    if not moves:
        #animal case-- go closer to the robber
        curr_cop_pos[i] = get_closest_unoccupied(curr_cop_pos, robber_pos, i)
        return minimize_avail_helper(curr_cop_pos, robber_pos, i+1, occupied_axes, cop_moves, robber_moves)
    
    #get squares with min # available squares for robber
    vals = map(lambda tup: tup[1], moves)
    min_val = min(vals)
    min_avail_moves = list(filter(lambda tup: tup[1] == min_val, moves))

    #sort by which config gives the max min SD
    best_move = get_min_LSD_move(robber_pos, min_avail_moves)
    
    return best_move[0], best_move[1]

def minimize_available(cop_pos: list, robber_pos: tuple, cop_states, robber_states) -> list:
    # Function that returns the move for the cops that minimizes the number of available squares for the robber
    #this function could be the combinatorially large one, but we are going to introduce our greedy heuristic
    #our strategy is such: the cops should always directly threaten a unique line of movement
    #get set of cop i available_moves \intersect set of robber 
    #filter out whichever are on occupied axes
    #use backtracking algorithm, recursively call min_avail_helper w/i+1, new cop_pos

    occupied_axes = {
        'h': False,
        'v': False,
        'negd': False,
        'posd': False
    }
    
    min_config, min_squares = minimize_avail_helper(cop_pos, robber_pos, 0, occupied_axes, cop_states, robber_states)
    
    return min_config

def maximize_available(cop_pos: list, cop_states, robber_states, robber_pos:tuple =-1, ) -> tuple: #-1 denotes no robber placed yet ie startin
    # Function that returns move for the robber that maximizes the number of squares for their next turn (assuming cops try to minimize)
    #get set of valid moves available_squares
    #for all moves m, call available_squares(cop_pos, m), get size of set
    #track max size and move, return that move
    #O(n)

    r_neighbors = available_squares(cop_pos, robber_pos)

    moves = dict() # move -> min cop move in anticipation
    
    for move in r_neighbors:
        #Q: here, do we want cop moves to take into account that cops wont repeat moves ?
        #at this point, cops are making suboptimal moves
        # i guess dont take into account, as robber doesnt care for repeating moves?
        cop_response = minimize_available(cop_pos, move, [], [])
        max_min_val = len(available_squares(cop_response, move))

        moves[move] = max_min_val
    
    if not moves:
        return robber_pos

    #sort by value descending, then by SD descending
    max_val = max(moves.values())
    max_avail_moves = {k: v for k, v in moves.items() if v == max_val}

    best_moves = sorted(
        max_avail_moves.items(),
        key=lambda item: -get_SD_length(item[0])
    )
        
    return best_moves[0][0]

def k_cop_win(cop_start, robber_start, itr, cop_states, robber_states):
    #returns true if cop win possible with k cops
    cop_move = minimize_available(cop_start, robber_start, cop_states, robber_states) # The cops try to minimize the available squares
    print("Cops move:", cop_move)
    cop_states.append(cop_move)
    robber_states.append(robber_start)
    avail_squares.append(len(available_squares(cop_move, robber_start)))
    robber_move = maximize_available(cop_move, cop_states, robber_states, robber_start) # The robber tries to maximize this minimum
    print("Robber moves to:", robber_move)
    print(avail_squares[-1], "squares available for after move", itr)

    cop_states.append(cop_move)
    robber_states.append(robber_move)
    
    # Checking if the cops have captured the robber
    if len(available_squares(cop_move, robber_move)) == 0:
        return True, cop_states, robber_states
    
    # If the cops can't decrease the number of available moves, they lose
    #if len(avail_squares) > 1 and avail_squares[-1] > avail_squares[-2]:
    #    print("available squares increased")
    #    return False

    if itr > n**2:
        print("iterations exceeded")
        return False, cop_states, robber_states
            
    # If the cop's haven't won yet, keep going
    return k_cop_win(cop_move, robber_move, itr+1, cop_states, robber_states)

In [None]:
#CODE FOR GENERATING ANIMAL/ROYAL GRAPHS GIVEN DIRECTIONS

def make_graph(n, slopes, animal=False):
    from sage.all import QQ, Infinity

    vertices = [(x, y) for x in range(n) for y in range(n)]
    G = Graph()
    G.add_vertices(vertices)

    for i, (x1, y1) in enumerate(vertices):
        # Convert slope list to exact rational numbers or Infinity
        D = set(QQ(s) if s != 'inf' else Infinity for s in slopes)
        for j in range(i+1, len(vertices)):
            x2, y2 = vertices[j]
            dx = x2 - x1
            dy = y2 - y1

            if dx == 0:
                slope = Infinity
            else:
                slope = QQ(dy) / QQ(dx)

            if slope in D:
                G.add_edge((x1, y1), (x2, y2))
                if animal:
                    D.remove(slope)

    return G


In [None]:
'''
EDIT THIS CODE TO CHANGE THE GRAPH
similar to evans code, just input slopes into a list and pass it into make_graph function (specify animal or royal w/bool)
'''

n=16
knight = [2, -2, 1/2, -1/2]
queen = [0, 'inf', 1, -1]
bishop = [1, -1]
idk = [1/3, -1/3, 3, 3]
G = make_graph(n, queen, False)


In [None]:
#n = 15
#T = n**2
#G = graphs.QueenGraph([n,n])

'''
run this code to run the above greedy algorithm on the graph defined above
define a list of tuples representing where you want your cops to start in (x,y) coords
then pass into play_game()

you could also use this to iteratively check the largest n for which k cops can win w/this algorithm in a loop
'''

avail_squares = list()

def play_game(cops_start):
    print(cops_start)
    robber_start = maximize_available(cops_start.copy(), [], [])
    robber_moves = [robber_start]
    cop_moves = [cops_start]
    print(f"rstart: {robber_start}, cops: {cops_start}")
    return k_cop_win(cops_start, robber_start, 1, cop_moves, robber_moves)

#6x6 domination
dom_start = math.floor((n+1)/2) - 3
dom_set = [(dom_start, dom_start), (dom_start + 4, dom_start + 2), (dom_start + 2, dom_start + 4)]

corner_start = [(0,0), (n-1,n-1), (0,n-1)]

two_cops = [(0,0), (n-1,n-1)]

mid = math.floor(n/2)
print(mid)
four_cops = [(mid, mid), (mid-1, mid), (mid-1,mid-1), (mid,mid-1)]

knight_diag = [(mid, mid), (mid+1, mid+1), (mid-1,mid-1)]

winp, cop_moves, robber_moves = play_game(knight_diag)
print("Cop win:", winp)
#print(cop_moves, robber_moves)

6
[(6, 6), (7, 7), (5, 5)]
rstart: (5, 9), cops: [(6, 6), (7, 7), (5, 5)]
Cops move: [(7, 8), (6, 9), (4, 7)]
Robber moves to: (6, 7)
6 squares available for after move 1
Cops move: [(8, 6), (4, 8), (5, 5)]
Robber moves to: (4, 6)
5 squares available for after move 2
Cops move: [(6, 5), (2, 7), (3, 4)]
Robber moves to: (5, 4)
5 squares available for after move 3
Cops move: [(7, 3), (3, 5), (4, 2)]
Robber moves to: (6, 6)
5 squares available for after move 4
Cops move: [(8, 5), (4, 7), (5, 4)]
Robber moves to: (4, 5)
5 squares available for after move 5
Cops move: [(6, 4), (2, 6), (3, 3)]
Robber moves to: (6, 6)
5 squares available for after move 6
SEEN
Cops move: [(8, 5), (4, 7), (4, 5)]
Robber moves to: (5, 4)
5 squares available for after move 7
Cops move: [(7, 3), (3, 5), (3, 3)]
Robber moves to: (6, 6)
5 squares available for after move 8
SEEN
SEEN
SEEN
Cops move: [(8, 5), (5, 4), (4, 5)]
Robber moves to: (7, 4)
5 squares available for after move 9
Cops move: [(9, 3), (6, 2), (6, 6

In [None]:
%pip install ipyevents

Collecting ipyevents
  Downloading ipyevents-2.0.2-py3-none-any.whl.metadata (2.9 kB)
Downloading ipyevents-2.0.2-py3-none-any.whl (101 kB)
Installing collected packages: ipyevents
Successfully installed ipyevents-2.0.2
Note: you may need to restart the kernel to use updated packages.


In [None]:
from sage.all import graphs
import networkx as nx
import matplotlib.pyplot as plt
from IPython.display import display
import ipywidgets as widgets
import math
from ipyevents import Event


def get_state(r_state, c_state):
    #get dict w/red = occupied by cops, blue = cops, black = robber, green = available for robber movement
    cop_occ = set()
    for cop in c_state:
        cop_occ = cop_occ.union(set(G.neighbors(cop)))

    cop_occ -= set(c_state)
    
    state = {
        'blue': set(c_state),
        'black': {r_state},
        'green': set(available_squares(c_state, r_state)) - {r_state},
        'red': cop_occ - {r_state}
    }
    return state

def convert_to_state(robber_moves, cop_moves):
    if len(robber_moves) != len(cop_moves):
        raise Exception("nonequal lists given")
    
    states = list()

    for state in range(len(robber_moves)):
        states.append(get_state(robber_moves[state], cop_moves[state]))

    return states

# Update function for each turn
def update(turn, states, nx_G, pos):
    fig, ax = plt.subplots(figsize=(12, 12))
    nx.draw(nx_G, pos, ax=ax, node_color='lightgrey', node_size=400, with_labels=False)

    for color, nodes in states[turn].items():
        nx.draw_networkx_nodes(nx_G, pos, nodelist=list(nodes), node_color=color, node_size=400, ax=ax)

    ax.set_title(f"Turn {math.floor(turn / 2) + 1}")
    ax.set_axis_off()
    plt.show()

'''
run this code to visualize moves made
'''

def display_game(G, cop_moves, robber_moves):
    n = int(math.sqrt(len(list(G.vertices()))))

    pos = {(i, j): (i, j) for i in range(n) for j in range(n)}
    G.set_pos(pos)
    nx_G = G.networkx_graph()

    states = convert_to_state(robber_moves, cop_moves)

    slider = widgets.IntSlider(min=0, max=len(states) - 1, step=1, value=0)
    
    out = widgets.interactive_output(
        update,
        {
            'turn': slider,
            'states': widgets.fixed(states),
            'nx_G': widgets.fixed(nx_G),
            'pos': widgets.fixed(pos)
        }
    )

    event = Event(source=slider, watched_events=['keydown'])

    def handle_event(event):
        if event['key'] == 'ArrowRight':
            slider.value = min(slider.max, slider.value + slider.step)
        elif event['key'] == 'ArrowLeft':
            slider.value = max(slider.min, slider.value - slider.step)

    event.on_dom_event(handle_event)

    display(slider, out)


In [None]:
display_game(G, cop_moves, robber_moves)

NameError: name 'cop_moves' is not defined

REINFORCEMENT LEARNING MODEL:

In [None]:
%pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-1.1.1-py3-none-any.whl.metadata (9.4 kB)
Collecting cloudpickle>=1.2.0 (from gymnasium)
  Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.1.1-py3-none-any.whl (965 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m965.4/965.4 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cloudpickle-3.1.1-py3-none-any.whl (20 kB)
Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, cloudpickle, gymnasium
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [gymnasium]/3[0m [gymnasium]
[1A[2KSuccessfully installed cloudpickle-3.1.1 farama-notifications-0.0.4 gymnasium-1.1.1
Note: you may need to restart the kernel to use updated packages.


In [None]:
import gymnasium as gym
from gymnasium.spaces import Dict, MultiDiscrete, Tuple, Discrete
import numpy as np
import random

class CopsAndRobbersEnv(gym.Env):
    """
    Custom Environment for Cops and Robbers on an nxn grid or graph.
    currently from the pov of the cops
    """
    def __init__(self, graph, k, render_mode=None):
        super().__init__()

        # --- Inputs ---
        self.k = k
        self.graph = graph  #SageMath graph
        self.nodes = list(self.graph.vertices())
        self.n = math.sqrt(len(self.nodes))
        max_deg = int(self.get_max_deg() + 1)

        print(type(max_deg))

        # 1) Observation space: dict with
        #    - "cop_pos": flattened k (x,y) pairs
        #    - "robber_pos": single (x,y) pair
        self.observation_space = Dict({
            "cop_pos": MultiDiscrete([self.n, self.n] * self.k),   # [x1,y1, x2,y2, …, xk,yk]
            "robber_pos": MultiDiscrete([self.n, self.n])     # [xr,yr]
        })

        # 2) Action space: a tuple of k Discrete spaces, each of size (max_deg+1)
        #    (we’ll map 0…max_deg-1 to “move to the i‑th neighbor” and max_deg to “stay”)
        self.action_space = Tuple([Discrete(max_deg) for _ in range(self.k)])

        self.cop_pos = [(-1, -1)] * self.k #list of k tuples representing cop positions
        self.robber_pos = (-1, -1)
        self.render_mode = render_mode
        self.itr = 0

    def get_max_deg(self):
        max_degree = 0

        for vertex in self.graph.vertices():
            degree = self.graph.degree(vertex)
            if degree > max_degree:
                max_degree = degree
        return max_degree
    
    '''
    get the current observations, should be robber and cop positions i think
    '''
    def get_obs(self):
        # Flatten your list of k (x,y) tuples into a single list:
        flat_cops = [coord for pos in self.cop_pos for coord in pos]
        # Robber is a single (x,y)
        rob = list(self.robber_pos)
        return {
            "cop_pos": np.array(flat_cops, dtype=int),
            "robber_pos": np.array(rob, dtype=int)
        }
    
    def get_info(self):
        '''
        return info pertinent to reward
        Q: what do we care about?
        A: i think
            - robber curr LSD length / layer (jacobs idea)
            - #available squares for robber?
        maybe define reward fn first
        '''
        return {
            "distance": 1,
            "SD_length": 1,
            "robber_available:": 1
        }

    def reset(self, seed = None, options = None):       
        """
        Reset the environment to an initial state and return the initial observation.
        random start?
        """
        super().reset(seed=seed)
        
        #random for now, start doesnt matter
        for _ in range(self.k):
            self.cop_pos = random.sample(self.nodes, self.k)

        self.robber_pos = maximize_available(self.cop_pos, [], []) #pick maximizing available start for robber
        self.itr = 0
        
        # Return initial state
        observation = self.get_obs()
        info = self.get_info()
        return observation, info

    def step(self, action):
        """
        Take an action (cop move) and return next observation, reward, terminated, truncated, info.
        input is an action in the form of a k-tuple, each from 0 to max_deg
        recall for cop alpha, a value of i means move to the ith neighbors, a value of max_deg means stay
        """
        assert self.action_space.contains(action), "Invalid action!"
        self.itr += 1

        for i, act in enumerate(action):
            current_pos = self.cop_pos[i]
            neighbors = self.graph.neighbors(current_pos)

            if act < len(neighbors):
                self.cop_pos[i] = neighbors[act]  #move to ith neighbor
            else:
                pass  #stay in place

        #self.robber_pos = maximize_available(self.cop_pos, [], [], self.robber_pos)
        rand = random.sample(list(available_squares(self.cop_pos, self.robber_pos)), 1)
        self.robber_pos = rand[0]

        # Check terminal state
        terminated = self.robber_pos in self.cop_pos
        truncated = self.itr > self.n**2

        # Define reward
        reward = 1.0 if terminated else -0.001  #small penalty to encourage faster capture

        observation = self.get_obs()
        info = self.get_info()

        #self.render()
        return observation, reward, terminated, truncated, info

    def render(self):
        """
        visualize the environment
        do it with above defined fns
        """
        if self.render_mode == "human":
            print(f"Cops at {self.cop_pos}, Robber at {self.robber_pos}, iteration {self.itr}")
        
    def get_moves(self):
        return self.cop_pos, self.robber_pos

gym.register(
    id="gymnasium_env/CopsAndRobbers-v0",
    entry_point=CopsAndRobbersEnv,
)
gym.pprint_registry()

===== classic_control =====
Acrobot-v1             CartPole-v0            CartPole-v1
MountainCar-v0         MountainCarContinuous-v0 Pendulum-v1
===== phys2d =====
phys2d/CartPole-v0     phys2d/CartPole-v1     phys2d/Pendulum-v0
===== box2d =====
BipedalWalker-v3       BipedalWalkerHardcore-v3 CarRacing-v3
LunarLander-v3         LunarLanderContinuous-v3
===== toy_text =====
Blackjack-v1           CliffWalking-v0        FrozenLake-v1
FrozenLake8x8-v1       Taxi-v3
===== tabular =====
tabular/Blackjack-v0   tabular/CliffWalking-v0
===== mujoco =====
Ant-v2                 Ant-v3                 Ant-v4
Ant-v5                 HalfCheetah-v2         HalfCheetah-v3
HalfCheetah-v4         HalfCheetah-v5         Hopper-v2
Hopper-v3              Hopper-v4              Hopper-v5
Humanoid-v2            Humanoid-v3            Humanoid-v4
Humanoid-v5            HumanoidStandup-v2     HumanoidStandup-v4
HumanoidStandup-v5     InvertedDoublePendulum-v2 InvertedDoublePendulum-v4
InvertedDoublePendulu

In [12]:
env = gym.make("gymnasium_env/CopsAndRobbers-v0", graph=G, k=3, render_mode="human")
observation, info = env.reset()

episode_over = False
total_reward = 0.0
c_states = list()
r_states = list()

while not episode_over:
    action = env.action_space.sample() # agent policy that uses the observation and info
    observation, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    episode_over = terminated or truncated

    cop_state, robber_state = env.unwrapped.get_moves()
    #print(c_states)
    c_states.append(copy.deepcopy(cop_state))
    r_states.append(copy.deepcopy(robber_state))

print(total_reward)
print(r_states)
print(c_states)
env.close()

#BOTTLENECK: the maximize_available fn, just do random available robber move

  logger.warn(


<class 'int'>
-0.257000000000000
[(7, 10), (9, 8), (7, 10), (4, 10), (15, 10), (15, 8), (15, 8), (15, 5), (9, 5), (4, 5), (4, 6), (2, 6), (3, 6), (4, 5), (4, 11), (4, 14), (4, 13), (7, 13), (12, 8), (3, 8), (12, 8), (5, 1), (14, 10), (10, 14), (10, 7), (10, 9), (4, 15), (2, 13), (5, 13), (5, 11), (9, 15), (13, 11), (7, 11), (8, 12), (6, 12), (9, 12), (5, 12), (15, 2), (15, 3), (8, 10), (7, 11), (3, 7), (3, 10), (3, 0), (12, 9), (12, 5), (6, 5), (0, 11), (5, 11), (5, 0), (8, 3), (6, 1), (4, 1), (6, 3), (13, 10), (6, 10), (13, 10), (7, 4), (7, 6), (8, 7), (11, 7), (7, 7), (2, 7), (2, 15), (10, 7), (11, 7), (13, 7), (7, 7), (5, 5), (2, 2), (11, 11), (12, 12), (12, 6), (12, 9), (12, 6), (11, 7), (11, 11), (11, 6), (11, 8), (6, 13), (0, 7), (0, 8), (10, 8), (12, 6), (3, 6), (6, 3), (5, 4), (15, 14), (15, 12), (14, 11), (15, 12), (3, 12), (2, 11), (2, 4), (2, 6), (2, 1), (15, 1), (15, 0), (11, 0), (7, 4), (4, 1), (4, 12), (4, 2), (13, 11), (5, 11), (9, 15), (9, 15), (5, 11), (13, 3), (13, 7)

  logger.warn(


In [None]:
display_game(G, c_states, r_states)

In [None]:
%pip install stable-baselines3[extra]

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# Wrap your env
env = make_vec_env(lambda: gym.make("gymnasium_env/CopsAndRobbers-v0", graph=G, k=3), n_envs=1)

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=100_000)

model.save("ppo_cops_and_robbers")

In [None]:
env = gym.make("gymnasium_env/CopsAndRobbers-v0", graph=G, k=3, render_mode="human")
model = PPO.load("ppo_cops_and_robbers")

obs, _ = env.reset()
done = False

while not done:
    action, _ = model.predict(obs)
    obs, reward, terminated, truncated, _ = env.step(action)
    done = terminated or truncated
