# Game.py


In [1]:
from abc import ABC, abstractmethod
from copy import deepcopy
from enum import Enum
import numpy as np
from termcolor import colored
import os

In [2]:
class Move(Enum):
    """
    Selects where you want to place the taken piece. The rest of the pieces are shifted
    """

    TOP = 0
    BOTTOM = 1
    LEFT = 2
    RIGHT = 3

In [3]:
class Player(ABC):
    def __init__(self) -> None:
        """You can change this for your player if you need to handle state/have memory"""
        pass

    @abstractmethod
    def make_move(self, game: "Game") -> tuple[tuple[int, int], Move]:
        """
        The game accepts coordinates of the type (X, Y). X goes from left to right, while Y goes from top to bottom, as in 2D graphics.
        Thus, the coordinates that this method returns shall be in the (X, Y) format.

        game: the Quixo game. You can use it to override the current game with yours, but everything is evaluated by the main game
        return values: this method shall return a tuple of X,Y positions and a move among TOP, BOTTOM, LEFT and RIGHT
        """
        pass

In [4]:
class Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self) -> np.ndarray:
        """
        Returns the board
        """
        return deepcopy(self._board)

    def get_current_player(self) -> int:
        """
        Returns the current player
        """
        return deepcopy(self.current_player_idx)

    def print(self):
        """Prints the board. -1 are neutral pieces, X are pieces of player 0, O pieces of player 1"""
        BOARD_DIM = 5
        l_len = (
            BOARD_DIM * 6 + 1
        )  # This is just for printing the right number of '-'
        os.system("cls||clear")
        print("   ", "  (0)   (1)   (2)   (3)   (4)")
        for i in range(BOARD_DIM):
            print("   ", "-" * l_len)
            print(f"({i})", "| ", end="")
            print(
                " | ".join(
                    map(
                        lambda e: "   "
                        if e == -1
                        else colored(" X ", "red")
                        if e == 0
                        else colored(" O ", "green"),
                        self._board[i].astype(int),
                    )
                ),
                end=" ",
            )
            print("|")
        print("   ", "-" * l_len)

    def check_winner(self) -> int:
        """Check the winner. Returns the player ID of the winner if any, otherwise returns -1"""
        # for each row
        player = self.get_current_player()
        winner = -1
        for x in range(self._board.shape[0]):
            # if a player has completed an entire row
            if self._board[x, 0] != -1 and all(
                self._board[x, :] == self._board[x, 0]
            ):
                # return winner is this guy
                winner = self._board[x, 0]
        if winner > -1 and winner != self.get_current_player():
            return winner
        # for each column
        for y in range(self._board.shape[1]):
            # if a player has completed an entire column
            if self._board[0, y] != -1 and all(
                self._board[:, y] == self._board[0, y]
            ):
                # return the relative id
                winner = self._board[0, y]
        if winner > -1 and winner != self.get_current_player():
            return winner
        # if a player has completed the principal diagonal
        if self._board[0, 0] != -1 and all(
            [self._board[x, x] for x in range(self._board.shape[0])]
            == self._board[0, 0]
        ):
            # return the relative id
            winner = self._board[0, 0]
        if winner > -1 and winner != self.get_current_player():
            return winner
        # if a player has completed the secondary diagonal
        if self._board[0, -1] != -1 and all(
            [self._board[x, -(x + 1)] for x in range(self._board.shape[0])]
            == self._board[0, -1]
        ):
            # return the relative id
            winner = self._board[0, -1]
        return winner

    def play(self, player1: Player, player2: Player) -> int:
        """Play the game. Returns the winning player"""
        players = [player1, player2]
        winner = -1
        self.print()
        while winner < 0:
            self.current_player_idx += 1
            self.current_player_idx %= len(players)
            ok = False
            while not ok:
                from_pos, slide = players[self.current_player_idx].make_move(
                    self
                )
                ok = self.__move(from_pos, slide, self.current_player_idx)
            self.print()
            winner = self.check_winner()
        return winner

    def __move(
        self, from_pos: tuple[int, int], slide: Move, player_id: int
    ) -> bool:
        """Perform a move"""
        if player_id > 2:
            return False
        # Oh God, Numpy arrays
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable:
                self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        """Take piece"""
        # acceptable only if in border
        acceptable: bool = (
            # check if it is in the first row
            (from_pos[0] == 0 and from_pos[1] < 5)
            # check if it is in the last row
            or (from_pos[0] == 4 and from_pos[1] < 5)
            # check if it is in the first column
            or (from_pos[1] == 0 and from_pos[0] < 5)
            # check if it is in the last column
            or (from_pos[1] == 4 and from_pos[0] < 5)
            # and check if the piece can be moved by the current player
        ) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable:
            self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        """Slide the other pieces"""
        # define the corners
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        # if the piece position is not in a corner
        if from_pos not in SIDES:
            # if it is at the TOP, it can be moved down, left or right
            acceptable_top: bool = from_pos[0] == 0 and (
                slide == Move.BOTTOM
                or slide == Move.LEFT
                or slide == Move.RIGHT
            )
            # if it is at the BOTTOM, it can be moved up, left or right
            acceptable_bottom: bool = from_pos[0] == 4 and (
                slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT
            )
            # if it is on the LEFT, it can be moved up, down or right
            acceptable_left: bool = from_pos[1] == 0 and (
                slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT
            )
            # if it is on the RIGHT, it can be moved up, down or left
            acceptable_right: bool = from_pos[1] == 4 and (
                slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT
            )
        # if the piece position is in a corner
        else:
            # if it is in the upper left corner, it can be moved to the right and down
            acceptable_top: bool = from_pos == (0, 0) and (
                slide == Move.BOTTOM or slide == Move.RIGHT
            )
            # if it is in the lower left corner, it can be moved to the right and up
            acceptable_left: bool = from_pos == (4, 0) and (
                slide == Move.TOP or slide == Move.RIGHT
            )
            # if it is in the upper right corner, it can be moved to the left and down
            acceptable_right: bool = from_pos == (0, 4) and (
                slide == Move.BOTTOM or slide == Move.LEFT
            )
            # if it is in the lower right corner, it can be moved to the left and up
            acceptable_bottom: bool = from_pos == (4, 4) and (
                slide == Move.TOP or slide == Move.LEFT
            )
        # check if the move is acceptable
        acceptable: bool = (
            acceptable_top
            or acceptable_bottom
            or acceptable_left
            or acceptable_right
        )
        # if it is
        if acceptable:
            # take the piece
            piece = self._board[from_pos]
            # if the player wants to slide it to the left
            if slide == Move.LEFT:
                # for each column starting from the column of the piece and moving to the left
                for i in range(from_pos[1], 0, -1):
                    # copy the value contained in the same row and the previous column
                    self._board[(from_pos[0], i)] = self._board[
                        (from_pos[0], i - 1)
                    ]
                # move the piece to the left
                self._board[(from_pos[0], 0)] = piece
            # if the player wants to slide it to the right
            elif slide == Move.RIGHT:
                # for each column starting from the column of the piece and moving to the right
                for i in range(from_pos[1], self._board.shape[1] - 1, 1):
                    # copy the value contained in the same row and the following column
                    self._board[(from_pos[0], i)] = self._board[
                        (from_pos[0], i + 1)
                    ]
                # move the piece to the right
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            # if the player wants to slide it upward
            elif slide == Move.TOP:
                # for each row starting from the row of the piece and going upward
                for i in range(from_pos[0], 0, -1):
                    # copy the value contained in the same column and the previous row
                    self._board[(i, from_pos[1])] = self._board[
                        (i - 1, from_pos[1])
                    ]
                # move the piece up
                self._board[(0, from_pos[1])] = piece
            # if the player wants to slide it downward
            elif slide == Move.BOTTOM:
                # for each row starting from the row of the piece and going downward
                for i in range(from_pos[0], self._board.shape[0] - 1, 1):
                    # copy the value contained in the same column and the following row
                    self._board[(i, from_pos[1])] = self._board[
                        (i + 1, from_pos[1])
                    ]
                # move the piece down
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

# Players

In [5]:
from collections import defaultdict
import random

In [6]:
def random_move() -> tuple[tuple[int, int], Move]:
    from_pos = (random.randint(0, 4), random.randint(0, 4))
    move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
    return from_pos, move


class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: "Game") -> tuple[tuple[int, int], Move]:
        return random_move()

# Train.py

IN this section we would like to decide the player neural network architecture and then we would like to train it against a random player and see how it performors.

In [7]:
import pandas as pd
from collections import deque
from numpy import unravel_index
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [8]:
class From_Pos_Net(torch.nn.Module):
    def __init__(self, gamma=0.9):
        super().__init__()
        self.optimizer = None
        self.gamma = gamma
        self.p1 = nn.Linear(
            26, 100
        )  # 26 because we have 25 integers and 1 int for player id
        self.p2 = nn.Linear(100, 100)
        self.p3 = nn.Linear(100, 100)
        self.p4 = nn.Linear(100, 25)
        self.device = "cpu"

    def forward(self, x):
        x = torch.tensor(x, dtype=torch.float32).to(self.device)
        x = F.relu(self.p1(x))
        x = F.relu(self.p2(x))
        x = F.relu(self.p3(x))
        from_pos = F.softmax(self.p4(x), dim=-1)

        # During the train step he should learn to take the from_pos using the professor convention

        return from_pos

    def train_net(self, state, next_state, from_pos, target):
        if target != 1:
            target += self.gamma * torch.max(self.forward(next_state))

        output = self.forward(state)
        target_f = output.clone()
        target_f[np.argmax(from_pos)] = target
        target_f.detach()
        self.optimizer.zero_grad()
        loss = F.mse_loss(output, target_f)
        loss.backward()
        self.optimizer.step()

In [9]:
class Action_Net(torch.nn.Module):
    def __init__(self, gamma=0.9):
        super().__init__()
        self.optimizer = None
        self.gamma = gamma
        # network for the action
        self.a1 = nn.Linear(27, 50)
        self.a2 = nn.Linear(50, 50)
        self.a3 = nn.Linear(50, 25)
        self.a4 = nn.Linear(25, 4)
        self.device = "cpu"

    def forward(self, y):
        y = torch.tensor(y, dtype=torch.float32).to(self.device)
        y = F.relu(self.a1(y))
        y = F.relu(self.a2(y))
        y = F.relu(self.a3(y))
        act = F.softmax(self.a4(y), dim=-1)

        return act

    def train_net(self, state, next_state, action, target):
        if target != 1:
            target += self.gamma * torch.max(self.forward(next_state))

        output = self.forward(state)
        target_f = output.clone()
        target_f[np.argmax(action)] = target
        target_f.detach()
        self.optimizer.zero_grad()
        loss = F.mse_loss(output, target_f)
        loss.backward()
        self.optimizer.step()
        return

In [10]:
class DeepQTrain(Player):
    def __init__(self, params):
        self._epsilon = 0.3
        self.train_mode = True
        self.file_name = "si andiamo in exploration"
        self.learning_rate = params["learning_rate"]
        self.weight_path = params["weight_path"]
        self.memory = []
        self.from_pos_net = From_Pos_Net()
        self.from_pos_net.optimizer = optim.Adam(
            self.from_pos_net.parameters(),
            weight_decay=0,
            lr=self.learning_rate,
        )
        self.action_net = Action_Net()
        self.action_net.optimizer = optim.Adam(
            self.action_net.parameters(), weight_decay=0, lr=self.learning_rate
        )

        DEVICE = "cpu"
        self.from_pos_net.to(DEVICE)
        self.action_net.to(DEVICE)

    def set_epsilon(self, eps: int) -> None:
        self._epsilon = eps

    def get_epsilon(self) -> int:
        return self._epsilon

    def make_move(self, game: "Game") -> tuple[tuple[int, int], Move]:
        input = game.get_board()
        from_pos_net_input = np.append(input, game.get_current_player())

        if self.train_mode and np.random.random() < self._epsilon:
            possible_moves = game.get_possible_moves()
            index = np.random.choice(len(possible_moves))
            move = possible_moves[index]
        else:
            from_pos = self.from_pos_net(from_pos_net_input)
            from_pos = from_pos.reshape(5, 5)  # we reshape it in matrix form.
            from_pos = unravel_index(
                from_pos.cpu().argmax(), from_pos.shape
            )  # we take the position of the matrix we are interested into.
            # this will be part of the additional input to the action net.
            act_net_input = np.append(input, from_pos)
            act = self.action_net.forward(act_net_input)
            act = act.cpu().argmax().numpy()

            move = (from_pos, Move(act))

        # vorrei la memoria fatta da (board_state+player, (move, act))
        self.memory.append((from_pos_net_input, move))

        return move

    # Now we need to design the train function.
    # It needs to assign the reward, and the idea should be like more important to the last move, less important
    # to the first move.
    # The for each move, we have the reward and we use the function from below.
    # To assigna the reward value we will use the Q_value update? Yes probably.
    def back_prop(self, reward: int):
        for idx in range(len(self.memory) - 1):
            curr_state, move = self.memory[idx]
            curr_pos, curr_move = move
            next_state, n_move = self.memory[idx + 1]
            next_pos, _ = n_move
            r = 0 if idx < len(self.memory) - 2 else reward
            self.from_pos_net.train_net(curr_state, next_state, curr_pos, r)
            self.action_net.train_net(
                np.append(curr_state[:-1], curr_pos),
                np.append(next_state[:-1], next_pos),
                curr_move,
                r,
            )

        self.memory = []

    def save_policy(self):
        from_pos_weights = self.from_pos_net.state_dict()
        act_weights = self.action_net.state_dict()
        torch.save(from_pos_weights, "from_pos_net.h5")
        torch.save(act_weights, "action_net.h5")
        pass







### New Game subclass

In [11]:
from tqdm import tqdm

In [12]:
class GameTrainer(Game):
    def __init__(self) -> None:
        super().__init__()

    def print(self) -> None:
        # os.system("cls||clear")
        pass

    def __acceptable_slides(self, from_position: tuple[int, int]):
        """When taking a piece from {from_position} returns the possible moves (slides)"""
        acceptable_slides = [Move.BOTTOM, Move.TOP, Move.LEFT, Move.RIGHT]
        axis_0 = from_position[0]  # axis_0 = 0 means uppermost row
        axis_1 = from_position[1]  # axis_1 = 0 means leftmost column

        if axis_0 == 0:  # can't move upwards if in the top row...
            acceptable_slides.remove(Move.TOP)
        elif axis_0 == 4:
            acceptable_slides.remove(Move.BOTTOM)

        if axis_1 == 0:
            acceptable_slides.remove(Move.LEFT)
        elif axis_1 == 4:
            acceptable_slides.remove(Move.RIGHT)
        return acceptable_slides

    def get_possible_moves(self):
        # __acceptable_slides -> prende from_pos e ritorna le slides possibili.
        # for solo sugli element di contorno. e prendiamo le posizion. poi abbiamo acceptable_slides che ci dice le slide possivbili.
        moves = []
        for row in [0, 4]:
            for col in range(5):
                if (
                    self._board[row, col] == self.current_player_idx
                    or self._board[row, col] == -1
                ):
                    slides = self.__acceptable_slides((row, col))
                    for slide in slides:
                        moves.append(((col, row), slide))
                if (
                    self._board[col, row] == self.current_player_idx
                    or self._board[col, row] == -1
                ):
                    slides = self.__acceptable_slides((col, row))
                    for slide in slides:
                        moves.append(((row, col), slide))
        return moves

    def play(self, player1: Player, player2: Player) -> int:
        self._board = np.full((5, 5), -1, dtype=np.int8)
        self.current_player_idx = -1
        players = [player1, player2]
        winner = -3
        n_move = 0
        while winner < 0 and n_move < 150:
            self.current_player_idx += 1
            self.current_player_idx %= len(players)
            ok = False
            in_loop = 0
            while not ok:
                in_loop += 1
                from_pos, slide = players[self.current_player_idx].make_move(
                    self
                )
                ok = self._Game__move(from_pos, slide, self.current_player_idx)
                if in_loop > 200:
                    pass
            n_move += 1
            winner = self.check_winner()
        return winner

    # look at the problem of the starting position.
    # The idea could be to leave the play as it is, then we can make our players play as first then as second.
    # we can also make array players shuffle. -> i'll go with this solution
    def train(self, trainee: Player, trainer: Player, epochs: int) -> None:
        if not trainee.file_name:
            print("starting full exploration mode")
            trainee.set_epsilon(1)
        # if isinstance(trainer, RLayer) and not trainer.file_name:
        # trainer.set_epsilon(1)
        players = [trainee, trainer]
        winning_reward = 1
        losing_reward = -3
        first_draw_reward = 0.1
        second_draw_reward = 0.5
        bar = tqdm(total=epochs, desc="Epoch")
        for ep in range(epochs):
            if ep % (epochs // 30) == 0:
                old_eps = trainee.get_epsilon()
                new_eps = old_eps - 0.1 if old_eps > 0.3 else old_eps
                # eps decrease at the same rate for both.
                trainee.set_epsilon(new_eps)
                # if isinstance(trainer, RLayer):
                # trainer.set_epsilon(new_eps)
            np.random.shuffle(players)
            winner_idx = self.play(players[0], players[1])
            loser_idx = (winner_idx + 1) % 2
            if winner_idx != -1:
                if isinstance(players[winner_idx], DeepQTrain):
                    players[winner_idx].back_prop(winning_reward)
                if isinstance(players[loser_idx], DeepQTrain):
                    players[loser_idx].back_prop(losing_reward)
            else:
                if isinstance(players[0], DeepQTrain):
                    # if first start draws, not very good.
                    players[0].back_prop(first_draw_reward)
                if isinstance(players[1], DeepQTrain):
                    # if second starting draws, good for him
                    players[1].back_prop(second_draw_reward)
            bar.update(1)
        if isinstance(trainee, DeepQTrain):
            trainee.save_policy()
        # if isinstance(trainer, RLayer):
        #     trainer.save_policy()
        return

# Prova

In [13]:
game = GameTrainer()
trainee = DeepQTrain({"learning_rate": 0.001, "weight_path": "ciao"})
trainer = RandomPlayer()

In [14]:
game.train(trainee, trainer, 100_000)

Epoch: 100%|██████████| 100000/100000 [2:00:39<00:00, 13.81it/s] 


In [15]:
trainee.is_training = False
n_game = 1000

print("starting evaluation")
bar = tqdm(total=n_game * 2, desc="Game #")

wins_as_first = 0
for _ in range(n_game):
    winner = game.play(trainee, RandomPlayer())
    if winner == 0:
        wins_as_first += 1
    bar.update(1)


wins_as_second = 0
for _ in range(n_game):
    winner = game.play(RandomPlayer(), trainee)
    if winner == 1:
        wins_as_second += 1
    bar.update(1)

print("")
print(f"Wins as first: {wins_as_first/n_game:.2f}%")
print(f"Wins as second: {wins_as_second/n_game:.2f}%")

print(f"total percentage: {(wins_as_first + wins_as_second)/(n_game*2):.2f}%")

starting evaluation


Game #: 100%|█████████▉| 1997/2000 [00:15<00:00, 133.66it/s]


Wins as first: 0.45%
Wins as second: 0.40%
total percentage: 0.42%


Game #: 100%|██████████| 2000/2000 [00:26<00:00, 133.66it/s]