# SIT320 Advanced Algorithms
## Module 12 - MDP and Reinforcement Learning

---

<img src="uml-class-diagram.png" width="1000" height="800">

---

### Board Class

In [None]:
from abc import ABC, abstractmethod

class Board(ABC):
    def __init__(self, boardDimensions):
        self.boardDimensions = boardDimensions

    @abstractmethod
    def createBoard(self):
        pass

    @abstractmethod
    def printBoard(self):
        pass

    @abstractmethod
    def setCellState(self, position, state):
        pass

    @abstractmethod
    def getCellState(self, position):
        pass

    @abstractmethod
    def getBoardState(self):
        pass

    @abstractmethod
    def getBoardDimensions(self):
        pass

    @abstractmethod
    def spaceIsFree(self, position):
        pass

    

In [None]:
import itertools
class TicTacToeBoard(Board):
    def __init__(self, boardDimensions):
        super().__init__(boardDimensions)
        self.createBoard()
        """Create a board of dimensions boardDimensions x boardDimensions
        Args: boardDimensions (int): the dimensions of the board
        """

    def createBoard(self):
        """Create a board of dimensions boardDimensions x boardDimensions"""
        self.boardState = {i+1: ' ' for i in range(self.getBoardDimensions()**2)}

    def printBoard(self):
        for row in [list(self.boardState.values())[i:i+self.boardDimensions] for i in range(0, self.getBoardDimensions()**2, self.boardDimensions)]:
            print('|'.join(row))
            if row != [row[-1]]*self.boardDimensions:
                print('-'*(self.boardDimensions*2-1))
        print('\n')

    def setCellState(self, position, state):
        """Set the state of a cell on the board
        Args:
            position (int): the position of the cell
            state (str): the state of the cell
        """
        self.boardState[position] = state

    def getCellState(self, position):
        """Get the state of a cell on the board
        Args:
            position (int): the position of the cell
        Returns: the state of the cell
        """
        return self.boardState[position]

    def getBoardState(self):
        """Get the state of the board
        Returns: A dictionary with keys 1 to boardDimensions**2 and values 'X', 'O' or ' '
        """
        return self.boardState

    def get_all_states(self, n):
    """ returns all possible board configurations, including
        equivalent rotations and reflections
        n: length of side of board
        returns: list of all possible states
    """

        def generate_states(self, player):
            """ recursive function that generates all possible states
                player: 1 or -1 (for X or O)
                returns: list of all possible states, each state is represented as a tuple of tuples
            """

            states = [self]

            for i, j in itertools.product(range(n), range(n)):
                if self[i][j] == 0:
                    new_board = [row[:] for row in board]
                    new_board[i][j] = player

                    if check_win(new_board) == 0:
                        next_player = -player
                        states += generate_states(new_board, next_player)

            return states

    # generate all legal board configurations starting from an empty board and player X
        board = [[0]*n for _ in range(n)]
        player = 1
        states = generate_states(board, player)

        # convert each board configuration to a tuple of tuples
        states = [tuple(map(tuple, board)) for board in states]

        return states

    def get_actions(self, state):
        """ state: current board configuration
            returns: set of all possible actions in a given state
        """
        return [
            (i, j)
            for i, j in itertools.product(range(n), range(n))
            if state[i][j] == 0
        ]

    def get_new_state(self, state, action):
        """ state: current board configuration
            action: action taken by the agent
            returns: new board configuration
        """
        i, j = action
        new_state = [row[:] for row in state]
        new_state[i][j] = 1
        return tuple(map(tuple, new_state))

    def getBoardDimensions(self):
        """Get the dimensions of the board
        Returns: An integer representing the dimensions of the board 
        """
        return self.boardDimensions
    
    def spaceIsFree(self, position):
        if self.boardState[position] == ' ':
            return True

---

### Game Logic

In [None]:
from abc import ABC, abstractmethod

class GameLogic():
    def __init__(self, boardGame):
        self.boardGame = boardGame

    @abstractmethod
    def chkForkWin(self):
        pass

    @abstractmethod
    def chkForDraw(self):
        pass

    @abstractmethod
    def chkMarkForWin(self, letter):
        pass

In [None]:
class TicTacToeGameLogic(GameLogic):
    def __init__(self, boardGame):
        super().__init__(boardGame)
        """Create a game logic object for the board game
        Args: boardGame (Board): the board game. Must be a subclass of Board
        """

    def chkForDraw(self):
        """Check if the game is a draw.
        Returns: bool: True if the game is a draw, False otherwise.
        """
        boardState = self.boardGame.getBoardState()
        return all(boardState[key] != ' ' for key in boardState.keys())

    def chkForWin(self):
        """Check if any player has won.
        Returns: bool: True if any player has won, False otherwise.
        """
        boardState = self.boardGame.getBoardState()
        boardDimensions = self.boardGame.getBoardDimensions()
        for i in range(boardDimensions):
            row = [boardState[i*boardDimensions+j+1] for j in range(boardDimensions)]
            if len(set(row)) == 1 and row[0] != ' ':
                return True
        for i in range(boardDimensions):
            column = [boardState[j*boardDimensions+i+1] for j in range(boardDimensions)]
            if len(set(column)) == 1 and column[0] != ' ':
                return True
        diagonal1 = [boardState[i*boardDimensions+i+1] for i in range(boardDimensions)]
        diagonal2 = [boardState[i*boardDimensions+(boardDimensions-i-1)+1] for i in range(boardDimensions)]
        if len(set(diagonal1)) == 1 and diagonal1[0] != ' ':
            return True
        return len(set(diagonal2)) == 1 and diagonal2[0] != ' '

    def chkMarkForWin(self, letter):
        """Check if the player with the specified letter has won.
        Args: letter (str): Letter of the player to check for win.
        Returns: bool: True if the player with the specified letter has won, False otherwise.
        """
        boardState = self.boardGame.getBoardState()
        boardDimensions = self.boardGame.getBoardDimensions()
        # check rows
        for i in range(boardDimensions):
            row = [boardState[i*boardDimensions+j+1] for j in range(boardDimensions)]
            if len(set(row)) == 1 and row[0] == letter:
                return True
        for i in range(boardDimensions):
            column = [boardState[j*boardDimensions+i+1] for j in range(boardDimensions)]
            if len(set(column)) == 1 and column[0] == letter:
                return True
        diagonal1 = [boardState[i*boardDimensions+i+1] for i in range(boardDimensions)]
        diagonal2 = [boardState[i*boardDimensions+(boardDimensions-i-1)+1] for i in range(boardDimensions)]
        if len(set(diagonal1)) == 1 and diagonal1[0] == letter:
            return True
        return len(set(diagonal2)) == 1 and diagonal2[0] == letter

---

### Player Class

In [None]:
from abc import ABC, abstractmethod

class Player(ABC):
    def __init__(self, letter, algorithm):
        self.letter = letter
        self.algorithm = algorithm
        """Create a player object
        Args: letter (str): the letter of the player. Must be 'X' or 'O'
        Args: algorithm (Algorithm): the algorithm used by the player. Must be a subclass of Algorithm
        """


    @abstractmethod
    def makeMove(self, boardGame):
        pass

In [None]:
class HumanPlayer(Player):
    def __init__(self, letter, algorithm):
        self.letter = letter
        self.algorithm = algorithm
    
    # function for player to choose a position
    def makeMove(self, boardGame):
        """Make a move by asking for input from the user.
        Args: boardGame (Board): The board game object.
        If the position is not free, ask for another position.
        If the position is free, set the cell state to the player's letter.
        """
        position = self.algorithm.bestMove(boardGame, self.letter)
        boardGame.setCellState(position, self.letter)

In [None]:
class ComputerPlayer(Player):
    def __init__(self, letter, algorithm):
        self.letter = letter
        self.algorithm = algorithm
    
    def makeMove(self, boardGame):
        """Make a move by using the algorithm to find the best move.
        Args: boardGame (Board): The board game object.
        Set the cell state to the player's letter.
        """
        position = self.algorithm.bestMove(boardGame, self.letter)
        boardGame.setCellState(position, self.letter)

---

### Algorithms

In [None]:
class Algorithm(ABC):
    def __init__(self, boardGame):
        self.boardGame = boardGame
        """Create an algorithm object for the board game
        Args: boardGame (Board): the board game. Must be a subclass of Board
        """
    
    @abstractmethod
    def bestMove(self, boardGame, letter):
        pass


### MiniMax

In [None]:
class Minimax(Algorithm):
    def __init__(self, boardGame):
        super().__init__(boardGame)

    def bestMove(self, boardGame, letter):
        """Find the best move for the computer player.
        Args: boardGame (Board): The board game object
        Args: letter (str): The letter of the computer player.
        Returns: int: The position of the best move."""
        boardState = boardGame.getBoardState()
        bestScore = -1000
        bestMove = 0
        for key in boardState.keys():
            if boardState[key] == ' ':
                boardState[key] = letter
                score = self.minimax(boardState, 0, False, letter)
                boardState[key] = ' '
                if score > bestScore:
                    bestScore = score
                    bestMove = key
        return bestMove

    def minimax(self, boardState, depth, isMaximizing, letter, maxDepth=5):
        """Find the best score for the computer player.
        Args: boardState (dict): The board state.
        Args: depth (int): The depth of the tree.
        Args: isMaximizing (bool): Whether the player is maximizing or not.
        Args: letter (str): The letter of the computer player.
        Args: maxDepth (int): The maximum depth of the tree.
        Returns: int: The best score for the computer player."""
        
        gameLogic = TicTacToeGameLogic(self.boardGame)
        opponentLetter = 'O' if letter == 'X' else 'X'
        if(gameLogic.chkMarkForWin(letter)):
            return 1
        elif(gameLogic.chkMarkForWin(opponentLetter)):
            return -1
        elif(gameLogic.chkForDraw()):
            return 0
        elif depth >= maxDepth:
            return 0

        if isMaximizing:
            bestScore = -1000
            for key in boardState.keys():
                if boardState[key] == ' ':
                    boardState[key] = letter
                    score = self.minimax(boardState, depth + 1, False, letter)
                    boardState[key] = ' '
                    if score > bestScore:
                        bestScore = score
        else:
            bestScore = 1000
            for key in boardState.keys():
                if boardState[key] == ' ':
                    boardState[key] = opponentLetter
                    score = self.minimax(boardState, depth + 1, True, letter)
                    boardState[key] = ' '
                    if score < bestScore:
                        bestScore = score
        return bestScore

### Minimax with Alpha-Beta Pruning

In [None]:
class MinimaxAlphaBeta(Algorithm):
    def __init__(self, boardGame):
        super().__init__(boardGame)

    def bestMove(self, boardGame, letter):
        boardState = boardGame.getBoardState()
        bestScore = -1000
        bestMove = 0
        for key in boardState.keys():
            if boardState[key] == ' ':
                boardState[key] = letter
                score = self.minimax(boardState, 0, False, letter)
                boardState[key] = ' '
                if score > bestScore:
                    bestScore = score
                    bestMove = key
        return bestMove

    def minimax(self, boardState, depth, isMaximizing, letter, alpha=-1000, beta=1000, maxDepth=5):
        """Find the best score for the computer player.
        Args: boardState (dict): The board state.
        Args: depth (int): The depth of the tree.
        Args: isMaximizing (bool): Whether the player is maximizing or not.
        Args: letter (str): The letter of the computer player.
        Args: alpha (int): The alpha value. 
        Args: beta (int): The beta value.
        Args: maxDepth (int): The maximum depth of the tree.
        Returns: int: The best score for the computer player."""
        gameLogic = TicTacToeGameLogic(self.boardGame)
        opponentLetter = 'O' if letter == 'X' else 'X'
        if(gameLogic.chkMarkForWin(letter)):
            return 1
        elif(gameLogic.chkMarkForWin(opponentLetter)):
            return -1
        elif(gameLogic.chkForDraw()):
            return 0
        elif depth >= maxDepth:
            return 0

        if isMaximizing:
            bestScore = -1000
            for key in boardState.keys():
                if boardState[key] == ' ':
                    boardState[key] = letter
                    score = self.minimax(boardState, depth+1, False, letter, alpha, beta)
                    boardState[key] = ' '
                    bestScore = max(score, bestScore)
                    alpha = max(alpha, score)
                    if beta <= alpha:
                        break
        else:
            bestScore = 1000
            for key in boardState.keys():
                if boardState[key] == ' ':
                    boardState[key] = opponentLetter
                    score = self.minimax(boardState, depth+1, True, letter, alpha, beta)
                    boardState[key] = ' '
                    bestScore = min(score, bestScore)
                    beta = min(beta, score)
                    if beta <= alpha:
                        break
        return bestScore

---

## Reinforcement Learning 

---


### Value Iteration
- These classes solve the tic-tac-toe game using the value iteration algorithm.
- We assume the tic-tac-toe game is an MDP with states, actions, transition probabilities, and rewards.
- States: the board state.
- Actions: the position to place the next move.
- Transition probabilities: 1.0 if the move is valid, 0.0 otherwise.
- Rewards: 1 if the move is a winning move, 0 otherwise.

In [None]:
class Node:
    def __init__(self, state):
        self.state = state
        self.value = 0  # Initialize value function for this state
        

class Graph:
    def __init__(self):
        self.nodes = {}
    
    def add_node(self, state):
        node = Node(state)
        self.nodes[state] = node
        return node

    def add_edge(self, state1, state2):
        node1 = self.nodes.get(state1) or self.add_node(state1)
        node2 = self.nodes.get(state2) or self.add_node(state2)



In [None]:
import random

class ValueIteration:
    def __init__(self, board_game):
        self.board_game = board_game
        self.value_function = {}
        self.policy = {}

    
    def initialize(self, graph):
        """ Initialize the value function and policy
            Args: graph (Graph): The graph object.
            Create all the states and edges in the graph.
        """
        # create all the states
        for state in self.board_game.get_all_states(self.board_game.getBoardDimensions()):
            graph.add_node(state)
        # create all the edges
        for state in graph.nodes.keys():
            for action in self.board_game.get_actions(state):
                new_state = self.board_game.get_new_state(state, action)
                graph.add_edge(state, new_state)
        # initialize value function and policy
        for state in graph.nodes.keys():
            self.value_function[state] = 0  # Initialize to 0
            self.policy[state] = None  # Initialize to None


    def value_iteration(self, graph, gamma=0.9, theta=1e-4):
        """ Input: graph (Graph): The graph object.
                   gamma (float): The discount factor.
                   theta (float): The convergence threshold.
            Output: value_function (dict): The value function.
                    policy (dict): The policy.
            Iterate over all states and update the value function and policy until convergence.
        """
        new_value = 0  # Calculate new value based on Bellman equation
        while True:
            delta = 0
            for state in graph.nodes.keys():
                old_value = self.value_function[state]
                # Update policy here based on new_value
                new_value = self.get_value(state)
                self.value_function[state] = new_value
                delta = max(delta, abs(old_value - new_value))
            self.extract_policy(graph)
            if delta < theta:
                break


    def extract_policy(self, graph):
        """ Input: graph (Graph): The graph object.
            Output: policy (dict): The policy.
            Iterate over all states and update the policy based on the value function.
        """
        for state in graph.nodes.keys():
            actions = self.board_game.get_actions(state)
            # Update policy here based on new_value
            self.policy[state] = max(actions, key=lambda action: self.get_value(state, action))
        return self.policy

    



    def get_value(self, state, action=None):
        """ Input: state (tuple): The state.
                   action (tuple): The action.
            Output: value (float): The value.
        """
        # Calculate the value of a state by taking the maximum value of the Bellman equation over all possible actions.
        if action is None:
            return max(self.bellman_equation(state, a) for a in self.board_game.get_actions(state))
        else:
            return self.bellman_equation(state, action)

    def bellman_equation(self, state, action):
        """ Input: state (tuple): The state.
                   action (tuple): The action.
            Output: value (float): The value.
            Uses the Bellman equation to calculate the value of a state after taking a given action.
        """
    # Calculate the value of a state after taking a given action, using the Bellman equation.
        reward = self.get_reward(state, action)
        gamma = self.get_discount_factor(state)
        transition_probabilities = self.get_transition_probabilities(state, action)
        value_function = sum(transition_probabilities[new_state] * self.value_function[new_state] for new_state in transition_probabilities.keys())
        return reward + gamma * value_function
    
    def get_reward(self, state, action):
        """ Input: state (tuple): The state.
                   action (tuple): The action.
            Output: reward (float): The reward.
            Calculate the reward for a given state and action.
        """
        return 1 if self.board_game.chkMarkForWin(state, action) else 0


    def get_transition_probabilities(self, actions):
        """ Input: state (tuple): The state.
                   action (tuple): The action.
            Output: transition_probabilities (dict): The transition probabilities.
            Calculate the transition probabilities for a given state and action.
        """
        probs = [random.random() for _ in actions]
        total = sum(probs)
        return {action: prob / total for action, prob in zip(actions, probs)}



---

### Algorithm for the human player

In [None]:
class UserInput(Algorithm):
    def __init__(self, boardGame):
        super().__init__(boardGame)

    def bestMove(self, boardGame, letter):
        """Ask the user for input.
        Args: boardGame (Board): The board game object.
        Args: letter (str): The letter of the computer player.
        Returns: int: The position of the user's input."""
        while True:
            try:
                position = int(input("Please enter a position: "))
                if position < 1 or position > boardGame.getBoardDimensions()**2:
                    raise ValueError
                if boardGame.spaceIsFree(position):
                    return position
                else:
                    raise ValueError
            except ValueError:
                print("Invalid input!")

---

## Game

In [None]:
import time

def selectAlgorithm(boardGame):
    """Ask the user to choose an algorithm.
    Args: boardGame (Board): The board game object.
    Returns: Algorithm: The algorithm object."""
    print("1. Minimax")
    print("2. Minimax with Alpha Beta Pruning")
    print("3. Reinforcement Learning - NOT IMPLEMENTED")
    algorithm_choice = int(input("Please choose an algorithm: "))
    while algorithm_choice < 1 or algorithm_choice > 4:
        print("Algorithm choice must be between 1 and 4!")
        algorithm_choice = int(input("Please choose an algorithm: "))
    if algorithm_choice == 1:
        algorithm = Minimax(boardGame=boardGame)
    elif algorithm_choice == 2:
        algorithm = MinimaxAlphaBeta(boardGame=boardGame)
    elif algorithm_choice == 3:
        algorithm = ReinforcementLearning(boardGame=boardGame)
    return algorithm

def selectPlayer(letter, isComputer, boardGame):
    """Ask the user to choose a player.
    Args: letter (str): The letter of the player.
    Args: isComputer (bool): Whether the player is a computer or not.
    Args: boardGame (Board): The board game object.
    Returns: Player: The player object."""
    if isComputer:
        algorithm = selectAlgorithm(boardGame)
        return ComputerPlayer(letter, algorithm)
    else:
        algorithm = UserInput(boardGame)
        return HumanPlayer(letter, algorithm)

def gameLoop():
    """The main game loop.
    boardGame (Board): The board game object.
    gameLogic (GameLogic): The game logic object.
    playerX (Player): The player object. Calls selectPlayer() to set the player type.
    Starts a timer to time the game.
    While the game is not won or drawn, player one makes a move, then player two makes a move.
    If the game is won, print the winner and the time taken.
    If the game is drawn, print that it is a draw and the time taken."""
    boardDimensions = int(input("Please enter the board dimensions: "))
    while boardDimensions < 3 or boardDimensions > 5:
        print("Board dimensions must be between 3 and 5!")
        boardDimensions = int(input("Please enter the board dimensions: "))
    boardGame = TicTacToeBoard(boardDimensions)
    gameLogic = TicTacToeGameLogic(boardGame)  
    print("Which Algorithm should player one use?")
    playerOne = selectPlayer('X', False, boardGame)
    print("Which Algorithm should Player two use?")
    playerTwo = selectPlayer('O', True, boardGame)
    boardGame.printBoard()
    start = time.time()
    while not gameLogic.chkForWin() and not gameLogic.chkForDraw():
        playerOne.makeMove(boardGame)
        boardGame.printBoard()
        if gameLogic.chkForWin():
            print("Player", playerOne.letter, "wins!")
            end = time.time()
            print("Time taken: ", end - start)
            break
        elif gameLogic.chkForDraw():
            print("It's a draw!")
            end = time.time()
            print("Time taken: ", end - start)
            break
        playerTwo.makeMove(boardGame)
        boardGame.printBoard()
        if gameLogic.chkForWin():
            print("Player", playerTwo.letter, "wins!")
            end = time.time()
            print("Time taken: ", end - start)
            break
        elif gameLogic.chkForDraw():
            print("It's a draw!")
            end = time.time()
            print("Time taken: ", end - start)
            break

gameLoop()