## Graph

In [114]:
class Node:
    def __init__(self, row, col):
        self.val = (row, col) # Value of node = its coordinates ((0, 0) = top-left/start, (m - 1, n - 1) = bottom-right/end).
        self.adj_list = set()

# create a list of edges for Game
class Graph:
    def __init__(self, num_rows, num_cols):
        self.num_rows = num_rows
        self.num_cols = num_cols
        self.nodes = self._create_nodes()
        
        self.edges = set()
        for row in range(self.num_rows): # Kinda like initialising a 2D matrix. From the nodes generated, add the edges to the set.
            for col in range(self.num_cols):
                node = self.nodes[row][col]

                adj_list = node.adj_list
                for other_node in adj_list:
                    self.edges.add((node, other_node))
                    self.edges.add((other_node, node))
        
    def init_start_and_end(self):
        self.start = self.nodes[0][0] # top-left
        self.end = self.nodes[self.num_rows - 1][self.num_cols - 1] # bottom-right
        
    def _create_nodes(self):
        nodes = [[Node(row, col) for col in range(self.num_cols)] for row in range(self.num_rows)] # create normal node for entire matrix
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                node = nodes[row][col] # this stuff below is where we initialise the neighbours.
                if row > 0:
                    node.adj_list.add(nodes[row - 1][col])  # Upper neighbour.
                    nodes[row - 1][col].adj_list.add(node)
                if row < self.num_rows - 1:
                    node.adj_list.add(nodes[row + 1][col])  # Lower neighbour.
                    nodes[row + 1][col].adj_list.add(node)
                if col > 0:
                    node.adj_list.add(nodes[row][col - 1])  # Left neighbour.
                    nodes[row][col - 1].adj_list.add(node)
                if col < self.num_cols - 1:
                    node.adj_list.add(nodes[row][col + 1])  # Right neighbour.
                    nodes[row][col + 1].adj_list.add(node)
        return nodes
        
    def print_graph(self): # for debugging purposes
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                print(self.nodes[row][col].val)
            print()
            
    def print_adjacencies(self): # for debugging purposes
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                print((row, col), [node.val for node in self.nodes[row][col].adj_list])

In [115]:
g = Graph(3, 3)
g.init_start_and_end()

## Game

Rules:
1. s is in top-left (0, 0), t is bottom-right (m - 1, n - 1).
2. Fix-type player wants is to secure a path from s to t; to do this, the fix-type player secures an edge in the graph in each iteration.
3. Cut-type player wants to disconnect s and t; to do this, the cut-type player deletes an unsecured edge in the graph.
4. Game ends when there is a secured path from s to t (fix) or there are no paths between s and t (cut).

In [126]:
# fix: check for a path (dfs)
# cut: no more valid edges to choose
import random

class Game:
    def __init__(self, graph):
        self.graph = graph
        self.m = self.graph.num_rows
        self.n = self.graph.num_cols
        self.unsecured_count = (2 * self.m * self.n) - self.m - self.n # this is for the CUT player
        self.secured = [] # this is what the FIX player chooses; nodes
        self.secured_edges = [] # fix  
        self.removed_edges = [] # cut
        
        # these are the remaining unsecured edges    
        # i've used a set comprehension so it's easier to see
        # need to ensure both directions of edges are deleted when work is done (e.g. both ((0, 0), (1, 0)) and ((1, 0), (0, 0))
        self.remaining = {(node1.val, node2.val) for node1, node2 in self.graph.edges} 
        
        self.fix_win = False
        self.end = False
        
    def reset(self):
        self.unsecured_count = (2 * self.m * self.n) - self.m - self.n # this is for the CUT player
        self.secured = [] # this is what the FIX player chooses; nodes
        self.secured_edges = [] # fix  
        self.removed_edges = [] # cut
        self.remaining = {(node1.val, node2.val) for node1, node2 in self.graph.edges} 
        
        self.fix_win = False
        self.end = False
        
    # Plays step-by-step. This is what we'll use for "learning".
    def next_step_player(self):
        if self.unsecured_count > 0:
            if not self.end:
                # 1. CUT player's turn
                if len(self.remaining) == 0:
                    # No more valid edges to choose.
                    self.fix_win = False
                    self.end = True
                else:
                    edge_to_cut = self.choose_edge_to_cut()
                    self.cut(edge_to_cut)

            if not self.end:
                # 2. FIX player's turn
                if len(self.remaining) == 0:
                    # No more valid edges to choose.
                    self.fix_win = False
                    self.end = True
                else:
                    edge_to_fix = self.choose_edge_to_fix()
                    self.fix(edge_to_fix)
            
            if self.is_fix_path_complete():
                self.fix_win = True
                self.end = True
        else:
            self.end = True
        
    # Plays the entire thing.
    def play(self):
        while self.unsecured_count > 0:
            # 1. CUT player's turn
            if len(self.remaining) == 0:
                # No more valid edges to choose.
                self.fix_win = False
                self.end = True
                break
                
            edge_to_cut = self.choose_edge_to_cut()
            self.cut(edge_to_cut)
        
            # 2. FIX player's turn
            if len(self.remaining) == 0:
                # No more valid edges to choose.
                self.fix_win = False
                self.end = True
                break

            edge_to_fix = self.choose_edge_to_fix()
            self.fix(edge_to_fix)
            
            if self.is_fix_path_complete():
                self.fix_win = True
                self.end = True
                break
            
    def choose_edge_to_cut(self):
        # Need to implement some strategy here. Return as a tuple of coordinates.
        edge_to_cut = random.choice(list(self.remaining))
        return edge_to_cut

    def choose_edge_to_fix(self):
        # Need to implement some strategy here. Return as a tuple of coordinates.
        edge_to_fix = random.choice(list(self.remaining))
        return edge_to_fix

    def is_fix_path_complete(self): # this does BFS to check if there is a path from the start to the end.
        visited = set()
        stack = [(0, 0)]

        while stack:
            current_node = stack.pop()
            if current_node == (self.m - 1, self.n - 1):
                return True

            for i in range(len(self.secured) - 1):
                edge = (self.secured[i], self.secured[i + 1])
                reverse_edge = (self.secured[i + 1], self.secured[i])

                if (edge in self.secured_edges or reverse_edge in self.secured_edges) and current_node == self.secured[i]:
                    next_node = self.secured[i + 1]
                    if next_node not in visited:
                        visited.add(next_node)
                        stack.append(next_node)

        return False

    # 1. CUT player's function; removes unsecured edge in question (and its reverse).
    # Ideally we don't check if the edge is in self.remaining (we just assume it is).
    # But perhaps the choose function might fuck up.
    def cut(self, edge):
        # edge = ex: ((0, 0), (1,0))
        if edge in self.remaining:
            self.remaining.remove(edge)
            self.removed_edges.append(edge)
            self.unsecured_count -= 1
            
        # Also remove the reverse direction of the edge.
        reverse_edge = (edge[1], edge[0])
        if reverse_edge in self.remaining:
            self.remaining.remove(reverse_edge)
            self.unsecured_count -= 1

    def fix(self, edge):
        # edge = ex: ((0, 0), (1,0))
        if edge in self.remaining:
            self.remaining.remove(edge)
            self.secured.append(edge[0])
            self.secured.append(edge[1])
            self.secured_edges.append(edge)
            
        # Also remove the reverse direction of the edge.
        reverse_edge = (edge[1], edge[0])
        if reverse_edge in self.remaining:
            self.remaining.remove(reverse_edge)
            
    # Reward function
    def get_reward(self):
        if self.fix_win:
            # Positive reward when the FIX player wins
            reward = 1.0
        elif self.end:
            # Negative reward when the FIX player loses
            reward = -1.0
        else:
            # Intermediate reward for the ongoing game
            reward = 0.0            

        return reward
            
    def get_state(self):
        # Define the state representation based on the game state.
        secured_count = len(self.secured_edges)
        remaining_count = int(len(self.remaining) / 2) # Because reverse edges are here too.
        secured_edges = self.secured_edges
        deleted_edges = self.removed_edges
        remaining_edges = list(self.remaining) # Yet for this, we'll keep the reverse edges. Bit hypocritical, but fuck it.
        
        state = (secured_edges, deleted_edges, remaining_edges, secured_count, remaining_count)
    
        return state

In [153]:
for i in range(1000):
    g = Graph(4, 4)
    g.init_start_and_end()
    game = Game(g)
    game.play()
    if game.fix_win:
        break

In [165]:
game.secured_edges, game.remaining

([((0, 2), (1, 2)),
  ((1, 1), (1, 0)),
  ((1, 1), (1, 2)),
  ((1, 3), (2, 3)),
  ((3, 3), (2, 3)),
  ((1, 2), (1, 3)),
  ((2, 1), (1, 1)),
  ((2, 2), (1, 2)),
  ((0, 0), (1, 0))],
 {((0, 1), (1, 1)),
  ((0, 2), (0, 3)),
  ((0, 3), (0, 2)),
  ((1, 1), (0, 1)),
  ((2, 0), (3, 0)),
  ((2, 1), (2, 2)),
  ((2, 1), (3, 1)),
  ((2, 2), (2, 1)),
  ((3, 0), (2, 0)),
  ((3, 1), (2, 1)),
  ((3, 1), (3, 2)),
  ((3, 2), (3, 1))})

## Game (Updated to Include AI)

What I did here mate was renumber the graphs so it follows the following scheme (for a 3x3 graph, for example):

![3x3 graph](https://i.gyazo.com/3743bdc7923224e488a15dafaca0373c.png)

This had to be done so I could train the model better. I also started writing the outline of the model, agent, and trainer. They are not done, however. But, based on what I've seen/tested so far, it looks like the updated Graph and GameAI classes work as intended. Sound.

In [1]:
class Node:
    def __init__(self, val):
        self.val = val # Value of node = its position (left, right, zigzag, continue).
        self.adj_list = set()

# create a list of edges for Game
class Graph:
    def __init__(self, num_rows, num_cols):
        self.num_rows = num_rows
        self.num_cols = num_cols
        self.nodes_int = self._create_nodes()
        
        self.edges = set()
        for row in range(self.num_rows): # Kinda like initialising a 2D matrix. From the nodes generated, add the edges to the set.
            for col in range(self.num_cols):
                node = self.nodes_mat[row][col]
                
                adj_list = self.mapper[node.val].adj_list
                node = self.mapper[node.val]
                for other_node in adj_list:
                    self.edges.add((node, other_node))
                    self.edges.add((other_node, node))
        
    def init_start_and_end(self):
        self.start = 1 # top-left
        self.end = self.num_rows * self.num_cols # bottom-right
        
    def _create_nodes(self):
        nodes_int, i = [], 1
        nodes = [[Node((row, col)) for col in range(self.num_cols)] for row in range(self.num_rows)] 
        mapper = dict()
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                node = Node(i) # Create the nodes, number 1 to (m * n).
                nodes_int.append(node)
                mapper[(row, col)] = node
                i += 1
        
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                node = nodes[row][col] # this stuff below is where we initialise the neighbours.
                if row > 0:
                    node.adj_list.add(nodes[row - 1][col])  # Upper neighbour.
                    nodes[row - 1][col].adj_list.add(node)
                    
                    current_mapping = mapper[(row, col)]
                    nbr_mapping = mapper[(row - 1, col)]
                    
                    current_mapping.adj_list.add(nbr_mapping)
                    nbr_mapping.adj_list.add(current_mapping)
                    
                if row < self.num_rows - 1:
                    node.adj_list.add(nodes[row + 1][col])  # Lower neighbour.
                    nodes[row + 1][col].adj_list.add(node)
                    
                    current_mapping = mapper[(row, col)]
                    nbr_mapping = mapper[(row + 1, col)]
                    
                    current_mapping.adj_list.add(nbr_mapping)
                    nbr_mapping.adj_list.add(current_mapping)
                    
                if col > 0:
                    node.adj_list.add(nodes[row][col - 1])  # Left neighbour.
                    nodes[row][col - 1].adj_list.add(node)
                    
                    current_mapping = mapper[(row, col)]
                    nbr_mapping = mapper[(row, col - 1)]
                    
                    current_mapping.adj_list.add(nbr_mapping)
                    nbr_mapping.adj_list.add(current_mapping)
                    
                if col < self.num_cols - 1:
                    node.adj_list.add(nodes[row][col + 1])  # Right neighbour.
                    nodes[row][col + 1].adj_list.add(node)
                    
                    current_mapping = mapper[(row, col)]
                    nbr_mapping = mapper[(row, col + 1)]
                    
                    current_mapping.adj_list.add(nbr_mapping)
                    nbr_mapping.adj_list.add(current_mapping)
                
        self.nodes_mat = nodes
        self.mapper = mapper
                    
        return nodes_int
        
    def print_graph(self): # for debugging purposes
        print([node.val for row in self.nodes_mat for node in row])
        print()
        print([node.val for node in self.nodes_int])

In [4]:
import random
class GameAI:
    def __init__(self, graph):
        self.graph = graph
        self.m = self.graph.num_rows
        self.n = self.graph.num_cols
        
        self.node_mapping = dict()
        for i in range(1, (self.m * self.n) + 1):
            self.node_mapping[i] = self.graph.nodes_int[i - 1] # e.g. 1 is in index 0, 2 is index 1, etc.
        
        self.unsecured_count = (2 * self.m * self.n) - self.m - self.n # this is for the CUT player
        self.secured = [] # this is what the FIX player chooses; nodes
        self.secured_edges = [] # fix  
        self.removed_edges = [] # cut
        
        self.remaining = {(node1.val, node2.val) for node1, node2 in self.graph.edges} 
        self.fix_win = False
        self.end = False
        
    def reset(self): # Reset everything for the next training iteration.
        self.unsecured_count = (2 * self.m * self.n) - self.m - self.n 
        self.secured = []
        self.secured_edges = []
        self.removed_edges = []
        
        self.remaining = {(node1.val, node2.val) for node1, node2 in self.graph.edges} 
        self.fix_win = False
        self.end = False
        
    # This is what our AI will train against. Random shit.
    def choose_edge_to_cut(self):
        edge_to_cut = random.choice(list(self.remaining))
        return edge_to_cut    
    
    # Plays step-by-step. This is what we'll use for "learning".
    def next_step_player(self, chosen_edge):
        if self.unsecured_count > 0:
            if not self.end:
                # 1. CUT/bot player's turn.
                if len(self.remaining) == 0:
                    # No more valid edges to choose.
                    self.fix_win = False
                    self.end = True
                else:
                    edge_to_cut = self.choose_edge_to_cut()
                    self.cut(edge_to_cut)

            if not self.end:
                # 2. FIX player's turn: where the magic happens.
                if len(self.remaining) == 0:
                    # No more valid edges to choose.
                    self.fix_win = False
                    self.end = True
                else:
                    self.fix(chosen_edge)
            
            if self.is_fix_path_complete():
                self.fix_win = True
                self.end = True
        else:
            self.end = True

    # This is still fucked. Needs to look within current edges.
    def is_fix_path_complete(self): # This does BFS to check if there is a path from the start to the end.
        visited = set()
        stack = [1]

        while stack:
            current_node = stack.pop()
            if current_node == self.m * self.n: # e.g. 4x4, 16 is the bottom-right.
                return True

            if current_node not in visited:
                visited.add(current_node)
                adj_list = self.node_mapping[current_node].adj_list
                
                for nbr in adj_list:
                    edge = (current_node, nbr.val)
                    reverse_edge = (nbr.val, current_node)

                    if edge in self.secured_edges or reverse_edge in self.secured_edges:
                        if nbr.val not in visited:
                            stack.append(nbr.val)

        return False

    # 1. CUT player's function; removes unsecured edge in question (and its reverse).
    def cut(self, edge):
        # edge = ex: (1, 4)
        if edge in self.remaining:
            self.remaining.remove(edge)
            self.removed_edges.append(edge)
            self.unsecured_count -= 1
            
        # Also remove the reverse direction of the edge.
        reverse_edge = (edge[1], edge[0])
        if reverse_edge in self.remaining:
            self.remaining.remove(reverse_edge)
            self.unsecured_count -= 1

    # 2. FIX player's function; secures unsecured edge in question (and its reverse).
    def fix(self, edge):
        # edge = ex: (1, 4)
        if edge in self.remaining:
            self.remaining.remove(edge)
            self.secured.append(edge[0])
            self.secured.append(edge[1])
            self.secured_edges.append(edge)
            
        # Also remove the reverse direction of the edge.
        reverse_edge = (edge[1], edge[0])
        if reverse_edge in self.remaining:
            self.remaining.remove(reverse_edge)
            
    # Reward function
    def get_reward(self):
        if self.fix_win:
            # Positive reward when the FIX player wins
            reward = 1.0
        elif self.end:
            # Negative reward when the FIX player loses
            reward = -1.0
        else:
            # Intermediate reward for the ongoing game
            reward = 0.0            

        return reward
            
    def get_state(self):
        # Define the state representation based on the game state.
        secured_count = len(self.secured_edges)
        remaining_count = int(len(self.remaining) / 2) # Because reverse edges are here too.
        secured_edges = self.secured_edges
        deleted_edges = self.removed_edges
        remaining_edges = list(self.remaining) # Yet for this, we'll keep the reverse edges. Bit hypocritical, but fuck it.
        
        state = (secured_edges, deleted_edges, remaining_edges, secured_count, remaining_count)
    
        return state
    
    # This is still here purely for debugging purposes.
    def play(self):
        while self.unsecured_count > 0:
            # 1. CUT player's turn
            if len(self.remaining) == 0:
                # No more valid edges to choose.
                self.fix_win = False
                self.end = True
                break
                
            edge_to_cut = self.choose_edge_to_cut()
            self.cut(edge_to_cut)
        
            # 2. FIX player's turn
            if len(self.remaining) == 0:
                # No more valid edges to choose.
                self.fix_win = False
                self.end = True
                break

            edge_to_fix = self.choose_edge_to_fix()
            self.fix(edge_to_fix)
            
            if self.is_fix_path_complete():
                self.fix_win = True
                self.end = True
                break
            elif self.unsecured_count == 0:
                self.fix_win = False
                self.end = True

    def choose_edge_to_fix(self):
        # Need to implement some strategy here. Return as a tuple of coordinates.
        edge_to_fix = random.choice(list(self.remaining))
        return edge_to_fix

In [7]:
g = Graph(3, 3)
g.init_start_and_end()
gameAI_test = GameAI(g)

for i in range(100):
    gameAI_test.play()
    if gameAI_test.fix_win:
        break
        
    gameAI_test.reset()

In [8]:
gameAI_test.secured_edges, gameAI_test.removed_edges, gameAI_test.fix_win

([(2, 1), (7, 8), (9, 6), (4, 5), (6, 3), (2, 3)],
 [(8, 9), (5, 2), (1, 4), (6, 5), (8, 5), (4, 7)],
 True)

## Agent

In [45]:
def convert_to_adj_matrix(edges, num_nodes):
    nodes = set()
    for edge in edges:
        nodes.add(edge[0])
        nodes.add(edge[1])

    adj_matrix = np.zeros((num_nodes + 1, num_nodes + 1)) # 0th col and 0th row will just be to pad.

    # Populate the adjacency matrix
    for edge in edges:
        adj_matrix[edge[0]][edge[1]] = 1
        
    return adj_matrix

def find_max_number(lists_of_tuples):
    max_number = float('-inf')

    for list_of_tuples in lists_of_tuples:
        for tup in list_of_tuples:
            numbers = [x for x in tup if isinstance(x, (int, float))]
            if numbers:
                current_max = max(numbers)
                if current_max > max_number:
                    max_number = current_max

    return max_number

In [457]:
# https://www.youtube.com/watch?v=L8ypSXwyBds
import random
from collections import deque

LR = 0.001
MAX_MEMORY = 1_000_000
BATCH_SIZE = 1000

class Agent:
    def __init__(self, state_size, game):
        self.n_games = 0 # Iteration number
        self.epsilon = 0 # Randomness for epsilon-greedy.
        self.gamma = 0.9 # "Discount rate"
        self.memory = deque(maxlen = MAX_MEMORY) # popleft()
        
        # Should set secured edges to -1 so agent doesn't select them. ShannonModel needs a function.
        self.model = ShannonModel(state_size, 302, (2 * game.m * game.n) - game.m - game.n)
        self.trainer = Trainer(self.model, lr = LR, gamma = self.gamma)
        
    def get_state(self, game):
        # (secured_edges, deleted_edges, remaining_edges, secured_count, remaining_count)
        state = game.get_state() # Could probably clean this up a bit and turn it into tensors in here.
        
        num_nodes = find_max_number([state[0], state[1], state[2]])
        
        secured_edges = convert_to_adj_matrix(state[0], num_nodes)
        deleted_edges = convert_to_adj_matrix(state[1], num_nodes)
        remaining_edges = convert_to_adj_matrix(state[2], num_nodes)
        secured_count, remaining_count = state[3], state[4]
        
        return np.array((secured_edges, deleted_edges, remaining_edges, secured_count, remaining_count), dtype = object)
    
    def remember(self, state, action, reward, next_state, done): # Stores this shit into the deque so it can be used for training later.
        self.memory.append((state, action, reward, next_state, done)) # popleft if MAX_MEMORY is reached

    def train_short_memory(self, state, action, reward, next_state, done):
        self.trainer.train_step(state, action, reward, next_state, done) # Pretty much the same shit as remember, but trainer uses data.
        
    def train_long_memory(self):
        if len(self.memory) > BATCH_SIZE:
            mini_sample = random.sample(self.memory, BATCH_SIZE)
        else:
            mini_sample = self.memory

        states, actions, rewards, next_states, dones = zip(*mini_sample) # From remember/memory
        self.trainer.train_step(states, actions, rewards, next_states, dones)
        
    def get_action(self, state):
        # Exploration / exploitation
        self.epsilon = 80 - self.n_games # Hardcoded, can change this shit
        
        remaining_edges = state[2]
        
        if random.randint(0, 200) < self.epsilon: # This works.
            indices = np.where(remaining_edges == 1)
            random_index = random.choice([i for i in range(len(indices[0]))]) 
            row_index, col_index = indices[0][random_index], indices[1][random_index]
            final_edge = (row_index, col_index)
        else: # TODO
            secured_edges, deleted_edges, remaining_edges, secured_count, remaining_count = (torch.tensor(state[0]).flatten(), 
                                                                                             torch.tensor(state[1]).flatten(), 
                                                                                             torch.tensor(state[2]).flatten(), 
                                                                                             torch.tensor([state[3]]), 
                                                                                             torch.tensor([state[4]]))
            
            # Need to make secured_count and remaining_count of dimension 1 as well.
            # secured_count = torch.unsqueeze(secured_count, dim = 0)
            # remaining_count = torch.unsqueeze(remaining_count, dim = 0)
            
            state0 = np.concatenate([secured_edges, deleted_edges, remaining_edges, secured_count, remaining_count]).tolist()
            state0 = torch.tensor(state0)
            
            prediction = self.model(state0)
            print(prediction)
            final_edge = torch.argmax(prediction).item()

        return final_edge

In [461]:
g = Graph(3, 3)
g.init_start_and_end()
gameAI = GameAI(g)
agent = Agent(302, gameAI)
boner = agent.get_action(agent.get_state(gameAI))
boner

tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  

10

This is where I have left off my work. I did not finish the agent yet bud.

In [341]:
remaining_edges = np.array([
    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
    [0., 0., 1., 0., 1., 0., 0., 0., 0., 0.],
    [0., 1., 0., 1., 0., 1., 0., 0., 0., 0.],
    [0., 0., 1., 0., 0., 0., 1., 0., 0., 0.],
    [0., 1., 0., 0., 0., 1., 0., 1., 0., 0.],
    [0., 0., 1., 0., 1., 0., 1., 0., 1., 0.],
    [0., 0., 0., 1., 0., 1., 0., 0., 0., 1.],
    [0., 0., 0., 0., 1., 0., 0., 0., 1., 0.],
    [0., 0., 0., 0., 0., 1., 0., 1., 0., 1.],
    [0., 0., 0., 0., 0., 0., 1., 0., 1., 0.]
])
indices = np.where(remaining_edges == 1)
random_index = random.choice([i for i in range(len(indices[0]))]) 
row_index, col_index = indices[0][random_index], indices[1][random_index]
final_edge = remaining_edges[row_index, col_index]
final_edge, (row_index, col_index)

(1.0, (1, 2))

In [312]:
indices[0]

array([1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8,
       9, 9])

## Model

In [13]:
# 1. model.py
# 2. agent.py
# model is the FFNN, agent is what trains the model
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import os

In [447]:
# This is the Feedforward Neural Network.
class ShannonModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

## Trainer

In [15]:
# This trains the ShannonModel() initialised above.
class Trainer: 
    def __init__(self, model, lr, gamma):
        self.lr = lr # Learning Rate
        self.gamma = gamma # https://ai.stackexchange.com/questions/8100/what-is-the-purpose-of-the-gamma-parameter-in-svms
        self.model = model # ShannonModel()
        self.optimizer = optim.Adam(model.parameters(), lr = self.lr) # Adam Algorithm (some shit idek)
        self.criterion = nn.MSELoss() # Standard MSE

    def train_step(self, state, action, reward, next_state, done): # Where the magic happens
        # Needs to be implemented.
        pass