Commit: [0edba49611125b58f5460ac362df2f5dc2e02e44 (message: "Revert to 6th of january")](https://github.com/squillero/computational-intelligence/tree/0edba49611125b58f5460ac362df2f5dc2e02e44)


In [None]:
from tqdm.auto import tqdm
from abc import ABC, abstractmethod
from copy import deepcopy
from enum import Enum

from collections import defaultdict

import numpy as np
import random
import time
import matplotlib.pyplot as plt
import os
import pickle

In [None]:
EPISODES_TRAINING = 500_000
EPISODES_GAME = 100
BOARD_SIZE = 5

## Game class definition

It contains _Move_ class, _Player_ class as interface for our players and **_Game_** class implemented by Andrea Calabrese.


In [None]:
class Move(Enum):
    """
    Selects where you want to place the taken piece. The rest of the pieces are shifted
    """

    TOP = 0  
    BOTTOM = 1
    LEFT = 2  
    RIGHT = 3


class Player(ABC):
    def __init__(self) -> None:
        """You can change this for your player if you need to handle state/have memory"""
        pass

    @abstractmethod
    def make_move(self, game: "Game") -> tuple[tuple[int, int], Move]:
        """
        The game accepts coordinates of the type (X, Y). X goes from left to right, while Y goes from top to bottom, as in 2D graphics.
        Thus, the coordinates that this method returns shall be in the (X, Y) format.

        game: the Quixo game. You can use it to override the current game with yours, but everything is evaluated by the main game
        return values: this method shall return a tuple of X,Y positions and a move among TOP, BOTTOM, LEFT and RIGHT
        """
        pass


class Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self) -> np.ndarray:
        """
        Returns the board
        """
        return deepcopy(self._board)

    def get_current_player(self) -> int:
        """
        Returns the current player
        """
        return deepcopy(self.current_player_idx)

    def print(self):
        """Prints the board. -1 are neutral pieces, 0 are pieces of player 0, 1 pieces of player 1"""
        print(self._board)

    def check_winner(self) -> int:
        """Check the winner. Returns the player ID of the winner if any, otherwise returns -1"""
        # for each row
        for x in range(self._board.shape[0]):
            # if a player has completed an entire row
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]):
                # return the relative id
                return self._board[x, 0]
        # for each column
        for y in range(self._board.shape[1]):
            # if a player has completed an entire column
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]):
                # return the relative id
                return self._board[0, y]
        # if a player has completed the principal diagonal
        if self._board[0, 0] != -1 and all(
            [self._board[x, x] for x in range(self._board.shape[0])]
            == self._board[0, 0]
        ):
            # return the relative id
            return self._board[0, 0]
        # if a player has completed the secondary diagonal
        if self._board[0, -1] != -1 and all(
            [self._board[x, -(x + 1)] for x in range(self._board.shape[0])]
            == self._board[0, -1]
        ):
            # return the relative id
            return self._board[0, -1]
        return -1

    def play(self, player1: Player, player2: Player) -> int:
        """Play the game. Returns the winning player"""
        players = [player1, player2]
        winner = -1
        while winner < 0:
            self.current_player_idx += 1
            self.current_player_idx %= len(players)
            ok = False
            while not ok:
                from_pos, slide = players[self.current_player_idx].make_move(self)
                ok = self.__move(from_pos, slide, self.current_player_idx)

            winner = self.check_winner()
        return winner

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        """Perform a move"""
        if player_id > 2:
            return False
        # Oh God, Numpy arrays
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable:
                self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        """Take piece"""
        # acceptable only if in border
        acceptable: bool = (
            # check if it is in the first row
            (from_pos[0] == 0 and from_pos[1] < 5)
            # check if it is in the last row
            or (from_pos[0] == 4 and from_pos[1] < 5)
            # check if it is in the first column
            or (from_pos[1] == 0 and from_pos[0] < 5)
            # check if it is in the last column
            or (from_pos[1] == 4 and from_pos[0] < 5)
            # and check if the piece can be moved by the current player
        ) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable:
            self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        """Slide the other pieces"""
        # define the corners
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        # if the piece position is not in a corner
        if from_pos not in SIDES:
            # if it is at the TOP, it can be moved down, left or right
            acceptable_top: bool = from_pos[0] == 0 and (
                slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT
            )
            # if it is at the BOTTOM, it can be moved up, left or right
            acceptable_bottom: bool = from_pos[0] == 4 and (
                slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT
            )
            # if it is on the LEFT, it can be moved up, down or right
            acceptable_left: bool = from_pos[1] == 0 and (
                slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT
            )
            # if it is on the RIGHT, it can be moved up, down or left
            acceptable_right: bool = from_pos[1] == 4 and (
                slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT
            )
        # if the piece position is in a corner
        else:
            # if it is in the upper left corner, it can be moved to the right and down
            acceptable_top: bool = from_pos == (0, 0) and (
                slide == Move.BOTTOM or slide == Move.RIGHT
            )
            # if it is in the lower left corner, it can be moved to the right and up
            acceptable_left: bool = from_pos == (4, 0) and (
                slide == Move.TOP or slide == Move.RIGHT
            )
            # if it is in the upper right corner, it can be moved to the left and down
            acceptable_right: bool = from_pos == (0, 4) and (
                slide == Move.BOTTOM or slide == Move.LEFT
            )
            # if it is in the lower right corner, it can be moved to the left and up
            acceptable_bottom: bool = from_pos == (4, 4) and (
                slide == Move.TOP or slide == Move.LEFT
            )
        # check if the move is acceptable
        acceptable: bool = (
            acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        )
        # if it is
        if acceptable:
            # take the piece
            piece = self._board[from_pos]
            # if the player wants to slide it to the left
            if slide == Move.LEFT:
                # for each column starting from the column of the piece and moving to the left
                for i in range(from_pos[1], 0, -1):
                    # copy the value contained in the same row and the previous column
                    self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                # move the piece to the left
                self._board[(from_pos[0], 0)] = piece
            # if the player wants to slide it to the right
            elif slide == Move.RIGHT:
                # for each column starting from the column of the piece and moving to the right
                for i in range(from_pos[1], self._board.shape[1] - 1, 1):
                    # copy the value contained in the same row and the following column
                    self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                # move the piece to the right
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            # if the player wants to slide it upward
            elif slide == Move.TOP:
                # for each row starting from the row of the piece and going upward
                for i in range(from_pos[0], 0, -1):
                    # copy the value contained in the same column and the previous row
                    self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                # move the piece up
                self._board[(0, from_pos[1])] = piece
            # if the player wants to slide it downward
            elif slide == Move.BOTTOM:
                # for each row starting from the row of the piece and going downward
                for i in range(from_pos[0], self._board.shape[0] - 1, 1):
                    # copy the value contained in the same column and the following row
                    self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                # move the piece down
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

## MyGame definition

**_MyGame_** class is a subclass of _Game_: it inherits its methods and attributes and it contains also new useful methods.


In [None]:
class MyGame(Game):
    """
    This class is used throughout the project instead of the Game class, as discussed with the professor, to adapt it to our needs.
    In this class we:
        - Override the __hash__ and __eq__ method to make the Game object hashable, and therefore usable as key in a dictionary;
        - Implement the is_valid method, which is used to check if a move is valid without modifying the current game, by applying the move function on a copy of the game;
        - Expose the move method to use it in a cleaner way throughout the project.
    """

    def __init__(self) -> None:
        super().__init__()

    def __str__(self) -> str:
        board = self.get_board()
        x_position = []
        o_position = []

        for i in range(board.shape[0]):
            for j in range(board.shape[1]):
                if board[i][j] == 1:
                    x_position.append((i, j))
                elif board[i][j] == 0:
                    o_position.append((i, j))

        str_x = "X: "
        for x in x_position:
            str_x += f"({x[0]},{x[1]}),"

        str_o = " O: "
        for o in o_position:
            str_o += f"({o[0]},{o[1]}),"

        return str_x + str_o

    def reversed_players_board(self):
        reversed_players_board = np.where(
            self._board == 0, 1, np.where(self._board == 1, 0, self._board)
        )  # 1 became 0 and 0 became 1

        return reversed_players_board

    def __hash__(self):
        alternative_boards = [self.reversed_players_board()] + [self._board]

        to_hash = [tuple(map(tuple, board)) for board in alternative_boards]
        hashes = [hash(board) for board in to_hash]
        return min(hashes)

    # Without an appropriate _eq_ method, two objects that are considered to be the same from the point of view of the application domain could have different hash values
    def __eq__(self, other):
        alternative_boards = [self.reversed_players_board()] + [self._board]

        return any(
            [np.array_equal(board, other._board) for board in alternative_boards]
        )

    def print(self):
        """
        IT OVERRIDES GAME'S print() METHOD

        Prints the board. '-' are neutral pieces, 'O' are pieces of player 0, 'X' pieces of player 1
        """

        board = [["" for _ in range(BOARD_SIZE)] for _ in range(BOARD_SIZE)]
        for x in range(self._board.shape[0]):
            for y in range(self._board.shape[1]):
                if self._board[x][y] == -1:
                    board[x][y] = "-"
                elif self._board[x][y] == 0:
                    board[x][y] = "O"
                else:
                    board[x][y] = "X"
        for x in board:
            print(x)

    def is_valid(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        cp = deepcopy(self)
        return cp._Game__move(from_pos, slide, player_id)

    def move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        """
        Just to call __move() method from Game class; the method is private, but in MyGame we use it as public
        """

        return self._Game__move(from_pos, slide, player_id)

    def next_player(self):
        self.current_player_idx += 1
        self.current_player_idx %= 2

    def longest_sequence(self, player_id: int) -> int:
        # Returns the length of the longest sequence of pieces of the same player

        board = self.get_board()
        max_sequence = 0
        for i in range(board.shape[0]):
            for j in range(board.shape[1]):
                if board[i][j] == player_id:
                    # check horizontal sequence
                    sequence = 0
                    for k in range(j, board.shape[1]):
                        if board[i][k] == player_id:
                            sequence += 1
                        else:
                            break
                    max_sequence = max(max_sequence, sequence)

                    # check vertical sequence
                    sequence = 0
                    for k in range(i, board.shape[0]):
                        if board[k][j] == player_id:
                            sequence += 1
                        else:
                            break
                    max_sequence = max(max_sequence, sequence)

                    # check diagonal sequence
                    sequence = 0
                    for k in range(i, board.shape[0]):
                        if j + k - i < board.shape[1]:
                            if board[k][j + k - i] == player_id:
                                sequence += 1
                            else:
                                break
                    max_sequence = max(max_sequence, sequence)

                    # check anti-diagonal sequence
                    sequence = 0
                    for k in range(i, board.shape[0]):
                        if j - k + i >= 0:
                            if board[k][j - k + i] == player_id:
                                sequence += 1
                            else:
                                break
                    max_sequence = max(max_sequence, sequence)
        return max_sequence

    def get_possible_moves(self) -> list[tuple[tuple[int, int], Move]]:
        possible_moves = []
        for x in range(BOARD_SIZE):
            for y in range(BOARD_SIZE):
                if (
                    x >= 1 and x < BOARD_SIZE - 1 and y >= 1 and y < BOARD_SIZE - 1
                ):  # to skip internal cubes
                    continue
                for move in [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]:
                    if self.is_valid((x, y), move, self.current_player_idx):
                        possible_moves.append(((x, y), move))
        return possible_moves

## Player definition


In [None]:
class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: "MyGame") -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        return from_pos, move


total_move_count = 0
hit_count = 0


class QPlayer(Player):
    def __init__(
        self, learning_rate=0.1, discount_factor=0.95, exploration_prob=0.1
    ) -> None:
        super().__init__()
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.Q_table = defaultdict(float)

    def reward(self, state, next_state):
        if next_state.check_winner() == state.current_player_idx:
            return 1
        elif next_state.check_winner() == state.current_player_idx ^ 1:
            return -1
        else:
            previous_longest_sequence = state.longest_sequence(state.current_player_idx)
            current_longest_sequence = next_state.longest_sequence(
                state.current_player_idx
            )

            opponent_previous_longest_sequence = state.longest_sequence(
                state.current_player_idx ^ 1
            )
            opponent_current_longest_sequence = next_state.longest_sequence(
                next_state.current_player_idx ^ 1
            )

            if current_longest_sequence > previous_longest_sequence:
                return current_longest_sequence / BOARD_SIZE

            elif current_longest_sequence < previous_longest_sequence:
                return -previous_longest_sequence / BOARD_SIZE

            if opponent_current_longest_sequence < opponent_previous_longest_sequence:
                return opponent_previous_longest_sequence / BOARD_SIZE

            if opponent_current_longest_sequence > opponent_previous_longest_sequence:
                return -opponent_current_longest_sequence / BOARD_SIZE

            return 0

    def Qlearning(self):
        for _ in tqdm(range(EPISODES_TRAINING)):
            state_t0 = MyGame()

            if random.random() < 0.5:  # randomize who starts
                action = random.choice(state_t0.get_possible_moves())
                state_t0.move(action[0], action[1], state_t0.current_player_idx)
                state_t0.next_player()

            while state_t0.check_winner() < 0:
                # State t0 is our agent's turn, so it moves
                # state_t0 is s
                # action_t0 is a
                action_t0 = self.choose_action(state_t0)
                state_t1 = deepcopy(state_t0)
                state_t1.move(action_t0[0], action_t0[1], state_t0.current_player_idx)
                state_t1.next_player()

                # State t1 is the opponent's turn, so it moves
                # As this is a two player game, we simulate the oppoent as part of the
                # environment
                state_t2 = state_t1
                action_t1 = random.choice(state_t1.get_possible_moves())
                state_t2.move(action_t1[0], action_t1[1], state_t1.current_player_idx)
                state_t2.next_player()

                # State t2 is our agent's next turn (s')
                reward = self.reward(state_t0, state_t2)

                possible_actions = state_t2.get_possible_moves()
                best_action_value = max(
                    [
                        self.Q_table.get((state_t2, action), 0)
                        for action in possible_actions
                    ]
                )

                self.Q_table[(state_t0, action_t0)] = (
                    1 - self.learning_rate
                ) * self.Q_table.get((state_t0, action_t0), 0) + self.learning_rate * (
                    reward + self.discount_factor * best_action_value
                )

                state_t0 = state_t2

    def choose_action(self, state: "MyGame"):
        possible = state.get_possible_moves()
        if random.random() < self.exploration_prob:
            return random.choice(possible)
        else:
            best_q = float("-inf")
            best_move = None
            for action in possible:
                move_q = self.Q_table.get((state, action), 0)
                if move_q > best_q:
                    best_q = move_q
                    best_move = action
            return best_move

    def q_best_move(self, state: "MyGame"):
        possible = state.get_possible_moves()
        best_q = float("-inf")
        best_move = None
        global total_move_count
        global hit_count
        total_move_count += 1

        for action in possible:
            move_q = self.Q_table.get((state, action), 0)
            if move_q > best_q:
                best_q = move_q
                best_move = action
                if best_q != 0:
                    hit_count += 1
        return best_move

    def make_move(self, game: "MyGame") -> tuple[tuple[int, int], Move]:
        best_move = self.q_best_move(game)
        from_pos, move = best_move
        """
        print(
            "best_move computed: from_pos--> ",
            from_pos,
            "do--> ",
            move,
            "reward ",
            self.Q_table[(game, best_move)],
        )
        """
        return from_pos, move

### Training

We make here the training of Q player.


In [None]:
player1 = QPlayer()
train = False  # Change this to False if you want to play against the trained agent

if os.path.exists("q_table.pkl") and not train:
    with open("q_table.pkl", "rb") as f:
        print("Loading q_table.pkl")
        player1.Q_table = pickle.load(f)
else:
    player1.Qlearning()

## Draw_pie_chart

In [None]:
def draw_pie_chart(win_rate, loss_rate, draw_rate, title):
    # Define data
    data = [win_rate, loss_rate, draw_rate]
    labels = ["Wins", "Losses", "Draws"]
    colors = ["#29F05F", "#EC3954", "#4570F8"]

    # Create a pie chart
    fig, ax = plt.subplots()
    ax.pie(data, labels=labels, colors=colors, autopct="%1.1f%%")
    ax.set_title(title)

    # Show the chart

    plt.show()

# Gameplay


### Testing

We test here the performances of our agent for EPISODES_GAME matches.


**idxAgent** depends by the position of our agent in Game.play() method

- idxAgent = 0 if Game.play(Agent, opponent)
- idxAgent = 1 if Game.play(opponent, Agent)

Q-Learning player as **first player** -> idxAgent=0

In [None]:
counter1 = 0
counter2 = 0
idxAgent = 0
start_time = time.time()

for _ in tqdm(range(EPISODES_GAME)):
    g = MyGame()

    if random.random() < 0.5:  # randomize who starts
        g.next_player()

    player2 = RandomPlayer()
    winner = g.play(player1, player2)

    if winner == idxAgent:  # agent
        counter1 += 1
    if winner == 1-idxAgent:  # opponent
        counter2 += 1

    hit_count = 0
    total_move_count = 0

end_time = time.time()
elapsed_time = end_time - start_time

print("Win: ", counter1, "/", EPISODES_GAME)
print("Losses ", counter2, "/", EPISODES_GAME)
print("Ties: ", EPISODES_GAME - counter1 - counter2, "/", EPISODES_GAME)
print("Number of entries", len(player1.Q_table))
print("Time for: ", EPISODES_GAME, "--> ", elapsed_time)
draw_pie_chart(
    counter1,
    counter2,
    EPISODES_GAME - counter1 - counter2,
    f"Results with {EPISODES_GAME} games",
)  # to review if we have to pass the rates or just the number of winning/losing/draw matches

Q-Learning player as **second player** -> idxAgent=1

In [None]:
counter1 = 0
counter2 = 0
idxAgent = 1
start_time = time.time()

for _ in tqdm(range(EPISODES_GAME)):
    g = MyGame()

    if random.random() < 0.5:  # randomize who starts
        g.next_player()

    player2 = RandomPlayer()
    winner = g.play(player2, player1)

    if winner == idxAgent:  # agent
        counter1 += 1
    if winner == 1-idxAgent:  # opponent
        counter2 += 1

    hit_count = 0
    total_move_count = 0

end_time = time.time()
elapsed_time = end_time - start_time

print("Win: ", counter1, "/", EPISODES_GAME)
print("Losses ", counter2, "/", EPISODES_GAME)
print("Ties: ", EPISODES_GAME - counter1 - counter2, "/", EPISODES_GAME)
print("Number of entries", len(player1.Q_table))
print("Time for: ", EPISODES_GAME, "--> ", elapsed_time)
draw_pie_chart(
    counter1,
    counter2,
    EPISODES_GAME - counter1 - counter2,
    f"Results with {EPISODES_GAME} games",
)  # to review if we have to pass the rates or just the number of winning/losing/draw matches

In [None]:
file = open("q_table.pkl", "wb")
pickle.dump(player1.Q_table, file)
file.close()