## Graph

In [21]:
class Node:
    def __init__(self, row, col):
        self.val = (row, col) # Value of node = its coordinates ((0, 0) = top-left/start, (m - 1, n - 1) = bottom-right/end).
        self.adj_list = set()

# create a list of edges for Game
class Graph:
    def __init__(self, num_rows, num_cols):
        self.num_rows = num_rows
        self.num_cols = num_cols
        self.nodes = self._create_nodes()
        
        self.edges = set()
        for row in range(self.num_rows): # Kinda like initialising a 2D matrix. From the nodes generated, add the edges to the set.
            for col in range(self.num_cols):
                node = self.nodes[row][col]

                adj_list = node.adj_list
                for other_node in adj_list:
                    self.edges.add((node, other_node))
                    self.edges.add((other_node, node))
        
    def init_start_and_end(self):
        self.start = self.nodes[0][0] # top-left
        self.end = self.nodes[self.num_rows - 1][self.num_cols - 1] # bottom-right
        
    def _create_nodes(self):
        nodes = [[Node(row, col) for col in range(self.num_cols)] for row in range(self.num_rows)] # create normal node for entire matrix
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                node = nodes[row][col] # this stuff below is where we initialise the neighbours.
                if row > 0:
                    node.adj_list.add(nodes[row - 1][col])  # Upper neighbour.
                    nodes[row - 1][col].adj_list.add(node)
                if row < self.num_rows - 1:
                    node.adj_list.add(nodes[row + 1][col])  # Lower neighbour.
                    nodes[row + 1][col].adj_list.add(node)
                if col > 0:
                    node.adj_list.add(nodes[row][col - 1])  # Left neighbour.
                    nodes[row][col - 1].adj_list.add(node)
                if col < self.num_cols - 1:
                    node.adj_list.add(nodes[row][col + 1])  # Right neighbour.
                    nodes[row][col + 1].adj_list.add(node)
        return nodes
        
    def print_graph(self): # for debugging purposes
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                print(self.nodes[row][col].val)
            print()
            
    def print_adjacencies(self): # for debugging purposes
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                print((row, col), [node.val for node in self.nodes[row][col].adj_list])

In [73]:
g = Graph(3, 3)
g.init_start_and_end()

## Game

Rules:
1. s is in top-left (0, 0), t is bottom-right (m - 1, n - 1).
2. Fix-type player wants is to secure a path from s to t; to do this, the fix-type player secures an edge in the graph in each iteration.
3. Cut-type player wants to disconnect s and t; to do this, the cut-type player deletes an unsecured edge in the graph.
4. Game ends when there is a secured path from s to t (fix) or there are no paths between s and t (cut).

#### The problem with the code below:
1. choose_edge_to_cut() and choose_edge_to_fix() are just GPT generated and are probably shit.
2. play()'s logic is probably not correct.

In [85]:
# fix: check for a path (dfs)
# cut: no more valid edges to choose
import random

class Game:
    def __init__(self, graph):
        self.graph = graph
        self.m = self.graph.num_rows
        self.n = self.graph.num_cols
        self.unsecured_count = (2 * self.m * self.n) - self.m - self.n # this is for the CUT player
        self.secured = [] # this is what the FIX player chooses; nodes
        self.secured_edges = [] # fix  
        self.removed_edges = [] # cut
        
        # these are the remaining unsecured edges    
        # i've used a set comprehension so it's easier to see
        # need to ensure both directions of edges are deleted when work is done (e.g. both ((0, 0), (1, 0)) and ((1, 0), (0, 0))
        self.remaining = {(node1.val, node2.val) for node1, node2 in self.graph.edges} 
        
        self.fix_win = False
        self.end = False
        
    # Plays step-by-step. This is what we'll use for "learning".
    def next_step_player(self):
        if self.unsecured_count > 0:
            if not self.end:
                # 1. CUT player's turn
                if len(self.remaining) == 0:
                    # No more valid edges to choose.
                    self.fix_win = False
                    self.end = True
                else:
                    edge_to_cut = self.choose_edge_to_cut()
                    self.cut(edge_to_cut)

            if not self.end:
                # 2. FIX player's turn
                if len(self.remaining) == 0:
                    # No more valid edges to choose.
                    self.fix_win = False
                    self.end = True
                else:
                    edge_to_fix = self.choose_edge_to_fix()
                    self.fix(edge_to_fix)
            
            if self.is_fix_path_complete():
                self.fix_win = True
                self.end = True
        else:
            self.end = True
        
    # Plays the entire thing.
    def play(self):
        while self.unsecured_count > 0:
            # 1. CUT player's turn
            if len(self.remaining) == 0:
                # No more valid edges to choose.
                self.fix_win = False
                self.end = True
                break
                
            edge_to_cut = self.choose_edge_to_cut()
            self.cut(edge_to_cut)
        
            # 2. FIX player's turn
            if len(self.remaining) == 0:
                # No more valid edges to choose.
                self.fix_win = False
                self.end = True
                break

            edge_to_fix = self.choose_edge_to_fix()
            self.fix(edge_to_fix)
            
            if self.is_fix_path_complete():
                self.fix_win = True
                self.end = True
                break
            
    def choose_edge_to_cut(self):
        # Need to implement some strategy here. Return as a tuple of coordinates.
        edge_to_cut = random.choice(list(self.remaining))
        return edge_to_cut

    def choose_edge_to_fix(self):
        # Need to implement some strategy here. Return as a tuple of coordinates.
        edge_to_fix = random.choice(list(self.remaining))
        return edge_to_fix

    def is_fix_path_complete(self): # this does BFS to check if there is a path from the start to the end.
        visited = set()
        stack = [(0, 0)]

        while stack:
            current_node = stack.pop()
            if current_node == (self.m - 1, self.n - 1):
                return True

            for i in range(len(self.secured) - 1):
                edge = (self.secured[i], self.secured[i + 1])
                reverse_edge = (self.secured[i + 1], self.secured[i])

                if (edge in self.secured_edges or reverse_edge in self.secured_edges) and current_node == self.secured[i]:
                    next_node = self.secured[i + 1]
                    if next_node not in visited:
                        visited.add(next_node)
                        stack.append(next_node)

        return False

    # 1. CUT player's function; removes unsecured edge in question (and its reverse).
    # Ideally we don't check if the edge is in self.remaining (we just assume it is).
    # But perhaps the choose function might fuck up.
    def cut(self, edge):
        # edge = ex: ((0, 0), (1,0))
        if edge in self.remaining:
            self.remaining.remove(edge)
            self.removed_edges.append(edge)
            self.unsecured_count -= 1
            
        # Also remove the reverse direction of the edge.
        reverse_edge = (edge[1], edge[0])
        if reverse_edge in self.remaining:
            self.remaining.remove(reverse_edge)
            self.unsecured_count -= 1

    def fix(self, edge):
        # edge = ex: ((0, 0), (1,0))
        if edge in self.remaining:
            self.remaining.remove(edge)
            self.secured.append(edge[0])
            self.secured.append(edge[1])
            self.secured_edges.append(edge)
            
        # Also remove the reverse direction of the edge.
        reverse_edge = (edge[1], edge[0])
        if reverse_edge in self.remaining:
            self.remaining.remove(reverse_edge)
            
    # Reward function
    def get_reward(self):
        if self.fix_win:
            # Positive reward when the FIX player wins
            reward = 1.0
        elif self.end:
            # Negative reward when the FIX player loses
            reward = -1.0
        else:
            # Intermediate reward for the ongoing game
            reward = 0.0
            
        return reward
            
    def get_state(self):
        # Define the state representation based on the game state
        secured_count = len(self.secured_edges)
        remaining_count = len(self.remaining)
        secured_edges = self.secured_edges
        deleted_edges = self.removed_edges
        remaining_edges = list(self.remaining)
        
        state = (secured_edges, deleted_edges, remaining_edges, secured_count, remaining_count)
    
        return state

## Train

In [101]:
g = Graph(2, 2)
g.init_start_and_end()

overall_states = []
overall_rewards = []
for i in range(1000):
    # Create an instance of the Game class
    game = Game(g)

    rewards = []
    states = []
    while not game.end:
        game.next_step_player()
        reward = game.get_reward()
        state = game.get_state()
        
        rewards.append(reward)
        states.append(state)
        
    overall_states.append(states)
    overall_rewards.append(rewards)