# Using RL to learn to Checkmate

This will be much more in depth: using a much larger environment (based on the number of moves there are at any given time). We're still going to use the techniques discussed: game-states, movement, and a q-table.

***In order to make the q-table a reasonable size, we are going to use a 4x4 board instead of 8x8 since every piece location combination needs to be recorded.***


First off, we need to code in the Chess aspect. Since we are making an abbreviated game of chess (with limited pieces), we'll make our game set only have the pieces we want to use. This can later be expanded.
- Board
  - 8x8 matrix
- King
  - Possible moves: One space in any direction (horizontal / diagonal)
- Rook
  - Possible moves: 1-3 spaces in each horizontal direction (N-S-E-W)
  
Rules:
- Check
  - The king is being attacked.
- Checkmate
  - End of the game.
  
***

*In each game position, the agent will make either a king move or a rook move. Since some of those moves will be illegal (the piece would end up in check, or off the board, or going through another piece!), there will need to be some sort of mask which prohibits the agent from choosing these.*

In [26]:
import numpy as np
import matplotlib.pyplot as plt

In [27]:
np.random.seed(2021)
plt.style.use(['ggplot', 'seaborn-talk'])

In [28]:
class Board:
    def __init__(self, setup_with=list(), n_squares=4):
        self.n_squares = n_squares
        self.board = np.zeros((self.n_squares, self.n_squares), dtype='object')
        for p in setup_with:
            self.board[tuple(p.position)] = p
        
    def __str__(self):
        for x in range(self.n_squares):
            for y in range(self.n_squares):
                if self.board[(x, y)]:
                    print(f' {self.board[(x, y)]} ', end='')
                else:
                    print(' . ', end='')
            print()
        return ''
    
    def __repr__(self):
        self.__str__()
        return ''
    
    def in_bounds(self, coord):
        y, x = coord
        if (x < 0) | (x >= self.n_squares) | (y < 0) | (y >= self.n_squares):
            return False
        return True
    
    def is_occupied(self, coord):
        if self.board[coord]:
            return True
        return False
        
        
class Piece:
    def __init__(self, position, is_white: bool):
        """Position can be coordinates `(inv(y), x)` or string (`e4`)."""
        if isinstance(position, str):
            self.position = self.__interpret_position_string(position)
        else:
            self.position = np.array(position)
        self.is_white = is_white
        
    def __interpret_position_string(self, s):
        """
        Board positions like `e4` or `f6`. 
        Converted to coordinates (y, x):
        
        'a8' -> (0, 0)
        'a1' -> (7, 0)
        'h1' -> (7, 7)
        """
        
        file_dct = {
            'a': 0,
            'b': 1,
            'c': 2,
            'd': 3
        }
        return np.array([4-int(s[1]), file_dct[s[0]]])
        
class King(Piece):
    def __init__(self, position, is_white):
        super().__init__(position, is_white)
        
    def __str__(self):
        if self.is_white:
            return 'K'
        return 'k'
    
    def __repr__(self):
        return self.__str__()
    
    def all_possible_moves(self):
        # Moves one space any direction.
        movements = [np.array([x, y]) 
                     for x in range(-1, 1+1) 
                     for y in range(-1, 1+1)]
        
        # Remove standing still
        movements = [m for m in movements if not all(m == np.array([0, 0]))]
        return np.array(movements)
    
    def possible_legal(self, game):
        enemy_king = [p for p in game.player.pieces + game.computer.pieces
                      if isinstance(p, King) and p != self][0]
        adj_to_enemy_king = [tuple(enemy_king.position + n) 
                             for n in enemy_king.all_possible_moves()]
        return [tuple(m) for m in self.all_possible_moves()
                if game.board.in_bounds(tuple(self.position + m))
                and not game.board.is_occupied(tuple(self.position + m))
                and tuple(self.position + m) not in adj_to_enemy_king]
    
    def controls(self, game):
        return [tuple(m) for m in self.all_possible_moves()
                if game.board.in_bounds(tuple(self.position + m))]
    
    
class Rook(Piece):
    def __init__(self, position, is_white):
        super().__init__(position, is_white)
        
    def __str__(self):
        if self.is_white:
            return 'R'
        return 'r'
    
    def __repr__(self):
        return self.__str__()
    
    def all_possible_moves(self):
        # Moves up to 3 spaces in all directions (n-e-s-w)
        movements = [np.array([y, 0]) for y in range(-3, 3+1)] + \
                    [np.array([0, x]) for x in range(-3, 3+1)]
        
        # Remove standing still.
        movements = [m for m in movements if not all(m == np.array([0, 0]))]
        return np.array(movements)
    
    def possible_legal(self, game):
        white_king_sq, white_rook_sq, black_king_sq = game.get_game_state()
        possible_moves = []
        for i in range(-1, -4, -1):
            new_square = tuple(self.position + (i, 0))
            if new_square == black_king_sq:
                possible_moves.append((i, 0))
                break
            elif new_square == white_king_sq:
                break
            possible_moves.append((i, 0))
        for i in range(-1, -4, -1):
            new_square = tuple(self.position + (0, i))
            if new_square == black_king_sq:
                possible_moves.append((i, 0))
                break
            elif new_square == white_king_sq:
                break
            possible_moves.append((0, i))

        for i in range(1, 4):
            new_square = tuple(self.position + (i, 0))
            if new_square == black_king_sq:
                possible_moves.append((i, 0))
                break
            elif new_square == white_king_sq:
                break
            possible_moves.append((i, 0))
        for i in range(1, 4):
            new_square = tuple(self.position + (0, i))
            if new_square == black_king_sq:
                possible_moves.append((0, i))
                break
            elif new_square == white_king_sq:
                break
            possible_moves.append((0, i))
            
        return [m for m in possible_moves if 
                game.board.in_bounds(tuple(self.position + m))]
    
    def controls(self, game):
        return self.possible_legal(game)
    
    
class Player:
    def __init__(self, pieces=list()):
        self.pieces = pieces
        
        
class Game:
    def __init__(self, white_pieces: list, black_pieces: list, board_size=4):
        self.player = Player(white_pieces)
        self.computer = Player(black_pieces)
        self.board = Board(self.player.pieces + self.computer.pieces, 
                           board_size)
        
    def get_game_state(self):
        """
        Return piece positions. 
            (w-king, w-rook, b-king)
        """
        
        pieces_dct = {
            'w-king': None,
            'w-rook': None,
            'b-king': self.computer.pieces[0].position
        }
        
        if isinstance(self.player.pieces[0], King):
            pieces_dct['w-king'] = self.player.pieces[0].position
            pieces_dct['w-rook'] = self.player.pieces[1].position
        else:
            pieces_dct['w-king'] = self.player.pieces[1].position
            pieces_dct['w-rook'] = self.player.pieces[0].position
        return (tuple(pieces_dct.get('w-king', (0, 0))), 
                tuple(pieces_dct.get('w-rook', (0, 0))), 
                tuple(pieces_dct.get('b-king', (0, 0))))
    
    def controlled_by(self, color: str):
        """
        Squares (coordinates) that `color`-player controls.
        `color` can be one of ('w', 'b', 'white', 'black')
        """
        
        if color.lower() in ('w', 'white'):
            side = self.player
        elif color.lower() in ('b', 'black'):
            side = self.computer
            
        all_possible = []
        for p in side.pieces:
            all_possible += [tuple(p.position + a) 
                             for a in p.controls(self)]
            
        return set([coord for coord in all_possible 
                    if self.board.in_bounds(coord)])
    
    def move(self, piece, action):
        starting_coord = piece.position
        ending_coord = tuple(starting_coord + action)
        self.board.board[starting_coord] = None
        self.board.board[ending_coord] = piece
        piece.position = np.array(ending_coord)
        
    
    def __nowhere_to_run(self):
        """Returns if the black king has no legal movement squares."""
        king = self.computer.pieces[0]
        king_possible = set(
            [tuple(king.position + a) for a in king.possible_legal(self)
             if self.board.in_bounds(tuple(king.position + a))])
        white_controlled = self.controlled_by('w')
        if not king_possible - white_controlled:
            return True
        return False
    
    def __nowhere_to_hide(self):
        """
        Returns if the black king is standing on a square controlled by 
        the opponent.
        """
        king = self.computer.pieces[0]
        return tuple(king.position) in self.controlled_by('w')
    
    def is_stalemate(self):
        """Returns bool if the board state is stalemate."""
        return self.__nowhere_to_run() and not self.__nowhere_to_hide()
    
    def is_checkmate(self):
        """Returns bool if the board state is checkmate."""
        return self.__nowhere_to_run() and self.__nowhere_to_hide()
    
    
def convert_coord_to_square(coord):
    """Returns the string (eg: `a1`) based on a coordinate."""
    rank, file = coord
    file_dct = {
        0: 'a',
        1: 'b',
        2: 'c',
        3: 'd'
    }
    return str(file_dct[file]) + str(4 - rank)

In [29]:
game = Game(
    white_pieces=[King('c1', 1), Rook('a4', 1)],
    black_pieces=[King('d4', 0)],
    board_size=4
)
print('*GAME BOARD*')
print('************')
print(game.board)
for p in game.player.pieces:
    print(p)
    print('Legal Moves:')
    print(p.possible_legal(game))
    print()
print('Controlled by white:', game.controlled_by('w'))

*GAME BOARD*
************
 R  .  .  k 
 .  .  .  . 
 .  .  .  . 
 .  .  K  . 

K
Legal Moves:
[(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1)]

R
Legal Moves:
[(1, 0), (2, 0), (3, 0), (0, 1), (0, 2), (0, 3)]

Controlled by white: {(0, 1), (3, 3), (3, 0), (3, 1), (2, 1), (2, 0), (2, 3), (2, 2), (1, 0), (0, 2), (0, 3)}


In [30]:
game.move(game.player.pieces[0], (-1,-1))
game.board

 R  .  .  k 
 .  .  .  . 
 .  K  .  . 
 .  .  .  . 




In [31]:
game.move(game.player.pieces[0], (-1,-1))
game.board

 R  .  .  k 
 K  .  .  . 
 .  .  .  . 
 .  .  .  . 




In [32]:
game.move(game.player.pieces[0], (1, 0))
game.board

 .  .  .  . 
 .  .  .  . 
 K  .  .  . 
 .  .  .  . 




***The q-table lookup will need a mapping to the pieces and the movements. The actions will look like this:***
```
possible_movements: {
    0:  ``K(-1, -1)``,
    1:  ``K(-1,  0)``,
    2:  ``K(-1,  1)``,
    3:  ``K( 0, -1)``,
    4:  ``K( 0,  1)``,
    5:  ``K( 1, -1)``,
    6:  ``K( 1,  0)``,
    7:  ``K( 1,  1)``,
    
    8:  ``R(-3,  0)``,
    9:  ``R(-2,  0)``,
    10: ``R(-1,  0)``,
    11: ``R( 1,  0)``,
    12: ``R( 2,  0)``,
    13: ``R( 3,  0)``,
    14: ``R( 0, -3)``,
    15: ``R( 0, -2)``,
    16: ``R( 0, -1)``,
    17: ``R( 0,  1)``,
    18: ``R( 0,  2)``,
    19: ``R( 0,  3)``
}
```

***The first 8 actions are king moves, the next 12 actions are rook moves.***

In [5]:
BOARD_SIZE = 4

In [6]:
game = Game(
    white_pieces=[King('d1', 1), Rook('a1', 1)],
    black_pieces=[King('d4', 0)],
    board_size=BOARD_SIZE
)
game.board

 .  .  .  k 
 .  .  .  . 
 .  .  .  . 
 R  .  .  K 




In [7]:
# Show what white controls.
sorted(
    list(
        zip(
            [convert_coord_to_square(c) for c in game.controlled_by('w')],
            [c for c in game.controlled_by('w')]
        )
    )
)

[('a2', (2, 0)),
 ('a3', (1, 0)),
 ('a4', (0, 0)),
 ('b1', (3, 1)),
 ('c1', (3, 2)),
 ('c2', (2, 2)),
 ('d2', (2, 3))]

In [8]:
# Check stalemate logic.
game = Game(
    white_pieces=[King('d2', 1), Rook('c1', 1)],
    black_pieces=[King('d4', 0)],
    board_size=BOARD_SIZE
)
print(game.board)
print('Checkmate:\n\t', game.is_checkmate())
print('Stalemate:\n\t', game.is_stalemate())

 .  .  .  k 
 .  .  .  . 
 .  .  .  K 
 .  .  R  . 

Checkmate:
	 False
Stalemate:
	 True


In [9]:
# Check checkmate logic.
game = Game(
    white_pieces=[King('d2', 1), Rook('a4', 1)],
    black_pieces=[King('d4', 0)],
    board_size=BOARD_SIZE
)
print(game.board)
print('Checkmate:\n\t', game.is_checkmate())
print('Stalemate:\n\t', game.is_stalemate())

 R  .  .  k 
 .  .  .  . 
 .  .  .  K 
 .  .  .  . 

Checkmate:
	 True
Stalemate:
	 False


In [10]:
# Get game state.
game.get_game_state()

((2, 3), (0, 0), (0, 3))

In [None]:
POSSIBLE_LOCATIONS = [(x, y) for x in range(4) for y in range(4)]

# Create q-table.
q_table = {}

for x in POSSIBLE_LOCATIONS:
    for y in POSSIBLE_LOCATIONS:
        for z in POSSIBLE_LOCATIONS:
            if (x != y) & (x != z) & (y != z):
                print((x, y, z))
#                 q_table[(x, y, z)] = 

In [None]:
tuple(Rook('a1', 1).position)

In [None]:
np.zeros((1, 8, 12))

In [None]:
np.array([0,0]) + (1,1)