Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB10

Use reinforcement learning to devise a tic-tac-toe player.

### Deadlines:

* Submission: [Dies Natalis Solis Invicti](https://en.wikipedia.org/wiki/Sol_Invictus)
* Reviews: [Befana](https://en.wikipedia.org/wiki/Befana)

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [34]:
from itertools import permutations
import numpy as np
from termcolor import colored

In [2]:
from abc import ABC, abstractmethod


class Player(ABC):
    def __init__(self) -> None:
        """You can change this for your player if you need to handle state/have memory"""
        pass

    @abstractmethod
    def make_move(self, game: "TicTacToe") -> str:
        """
        game: the Quixo game. You can use it to override the current game with yours, but everything is evaluated by the main game
        return values: this method shall return a tuple of X,Y positions and a move among TOP, BOTTOM, LEFT and RIGHT
        """
        pass

In [91]:
class TicTacToe:
    def __init__(self, board=None):
        """
        Board legend:
        cell = 0 -> no move on this cell
        cell = 1 -> player 1 made 'x' (1) as move
        cell = -1 -> player 2 made 'o' (-1) as move
        """
        if isinstance(board, np.ndarray):
            self.board = board
        else: # all cells initalized at 0 
            self.board = np.zeros(shape=(3, 3))

    def check_win(self):
        """Checks if someone won the game."""

        # Check win by columns
        if (abs(self.board.sum(axis=0)) == 3).any():
            return True
        # Check win by rows
        if (abs(self.board.sum(axis=1)) == 3).any():
            return True

        # Check win by diagonals
        sum_diag_princ = 0
        sum_diag_back = 0
        for i in range(3):
            sum_diag_back += self.board[i][i]
            sum_diag_princ += self.board[2 - i][i]
        if abs(sum_diag_back) == 3 or abs(sum_diag_princ) == 3:
            return True

        return False

    def make_move(self, player: int, move: tuple[int, int]):
        """Take a move in format x y and makes it."""
        x, y = move
        # check is a valid move
        if not (0 <= x <= 2 and 0 <= y <= 2):
            print("invalid move")
        elif self.board[x][y] != 0:
            print("invalid move")
        else:
            self.board[x][y] = player

    def possible_moves(self):
        """Return all the possible available moves to make."""
        moves = []
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == 0:
                    moves.append((i, j))

        return moves

    def print(self):
        l_len = 19
        print('   ', '  (0)   (1)   (2)')
        for i in range(3):
            print('   ', '-'*l_len)
            print(f'({i})', '| ', end='')
            print(
                " | ".join(
                    map(
                        lambda e: "   " if e == 0 else colored(" X ", "red") if e == 1 else colored(" O ", "green"),
                        self.board[i].astype(int),
                    )
                ), end=' '
            )
            print('|')
        print('   ', '-'*l_len)

    def game(self, player1: Player, player2: Player):
        # TODO at each move we could save the state of the board in a list.
        board_state = []
        someone_won = False
        self.board = np.zeros(shape=(3, 3))
        # We need to also consider ties!!
        # Possible moves = []
        while not someone_won:
            possible_moves = self.possible_moves()

            pass

In [79]:
board_init = np.array([
    [1, -1,-1], 
    [0, 1, -1], 
    [0, 0, 1], 
])

In [92]:
game = TicTacToe(board=board_init)
game.print()
game.check_win()

      (0)   (1)   (2)
    -------------------
(0) | [31m X [0m | [32m O [0m | [32m O [0m |
    -------------------
(1) |     | [31m X [0m | [32m O [0m |
    -------------------
(2) |     |     | [31m X [0m |
    -------------------


True