Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB10

Use reinforcement learning to devise a tic-tac-toe player.

### Deadlines:

* Submission: [Dies Natalis Solis Invicti](https://en.wikipedia.org/wiki/Sol_Invictus)
* Reviews: [Befana](https://en.wikipedia.org/wiki/Befana)

Notes:

* Reviews will be assigned  on Monday, December 4

In [4]:
from IPython.display import clear_output
import numpy as np
from termcolor import colored
from tqdm import tqdm
from collections import defaultdict
from copy import deepcopy

# TicTacToe

In [None]:
class TicTacToe:
    def __init__(self, board=None):
        """
        Board legend:
        cell = 0 -> no move on this cell
        cell = 1 -> player 1 made 'x' (1) as move
        cell = -1 -> player 2 made 'o' (-1) as move
        """
        if isinstance(board, np.ndarray):
            self.board = board
        else:  # all cells initalized at 0
            self.board = np.zeros(shape=(3, 3))

    def hash(self) -> str:
        return str(self.board)

    def check_win(self):
        """Checks if someone won the game."""

        # Check win by columns
        if (abs(self.board.sum(axis=0)) == 3).any():
            return True
        # Check win by rows
        if (abs(self.board.sum(axis=1)) == 3).any():
            return True

        # Check win by diagonals
        sum_diag_princ = 0
        sum_diag_back = 0
        for i in range(3):
            sum_diag_back += self.board[i][i]
            sum_diag_princ += self.board[2 - i][i]
        if abs(sum_diag_back) == 3 or abs(sum_diag_princ) == 3:
            return True

        return False

    def check_tie(self) -> bool:
        if not self.possible_moves():
            return True
        return False

    def make_move(self, move: tuple[int, int], value: int):
        """Take a move in format x y and makes it."""
        x, y = move
        # check is a valid move
        if not (0 <= x <= 2 and 0 <= y <= 2):
            print("invalid move")
        elif self.board[x][y] != 0:
            print("invalid move")
        else:
            self.board[x][y] = value

    def possible_moves(self):
        """Return all the possible available moves to make."""
        moves = []
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == 0:
                    moves.append((i, j))
        return moves

    def print(self):
        l_len = 19
        clear_output()
        print(f"{colored('Player 1', 'green')} make your move:\n")
        print("   ", "  (0)   (1)   (2)")
        for i in range(3):
            print("   ", "-" * l_len)
            print(f"({i})", "| ", end="")
            print(
                " | ".join(
                    map(
                        lambda e: "   "
                        if e == 0
                        else colored(" X ", "red")
                        if e == 1
                        else colored(" O ", "green"),
                        self.board[i].astype(int),
                    )
                ),
                end=" ",
            )
            print("|")
        print("   ", "-" * l_len)

# Players

In [5]:
from abc import ABC, abstractmethod


class Player(ABC):
    def __init__(self) -> None:
        """You can change this for your player if you need to handle state/have memory"""
        pass

    @abstractmethod
    def decide_move(
        self, game: "TicTacToe", move_symbol: int
    ) -> tuple[int, int]:
        """
        game: the TicTacToe game. You can use it to override the current game with yours, but everything is evaluated by the main game
        return values: this method shall return a tuple of X,Y positions
        """
        pass

    @abstractmethod
    def back_prop(self, reward: int):
        pass

In [6]:
class RandomPlayer(Player):
    def __init__(self) -> None:
        self.pol = defaultdict(float)
        super().__init__()

    def decide_move(self, game: "TicTacToe", move_symbol: int) -> str:
        all_moves = game.possible_moves()
        index = np.random.choice(len(all_moves))
        move = all_moves[index]
        board = deepcopy(game)
        board.make_move(move, move_symbol)
        self.pol[board.hash] += 1
        return all_moves[index]
        # return super().make_move(game)

    def back_prop(self, reward: int):
        return super().back_prop(reward)

In [13]:
class HumanPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def decide_move(
        self, game: "TicTacToe", move_symbol: int
    ) -> tuple[int, int]:
        color = "green" if move_symbol == -1 else "red"
        print_string = "Human player"
        print(f"{colored(print_string,color)} it's your turn")
        move = input("Insert x and y coordinates in format: 'x y' :")
        x, y = move.split(" ")
        return (int(x), int(y))

    def back_prop(self, reward: int):
        return super().back_prop(reward)