# Reinforcement Learning Final Project 

Group members:
1. `Inês Rocha: 20220052`
2. `Isabel Dias: 20191215`
3. `Joana Sousa: 20191205`
4. `Rafael Dinis: 20221643`

In [1]:
import chess
import gym
import gym_chess
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import copy
from typing import Optional
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import cv2
import time
import chess.pgn
import io

from IPython.display import clear_output, display


In this model, we only considered the first board and the last information matrices. The state was also reshaped in order to be fed to the models.

In [2]:
def preprocess_state(state):
    state_boards= np.c_[state[:,:,:14], state[:,:,-7:]] #the state we want just has the current board and the last matrices with information
    return np.array([state_boards.reshape(8,8,21)])

In [3]:
# Load the trained model
white_model = load_model('./models/main_chess_white_pretrained.keras')
black_model = load_model('./models/main_chess_black_pretrained.keras')

In [4]:
def WHITE_PLAYER_POLICY(env, state):
    Q_values = white_model.predict(state, verbose=0)[0]
    legal_q_values = Q_values[env.legal_actions]
    action = env.legal_actions[np.argmax(legal_q_values)]
    return action


def BLACK_PLAYER_POLICY(env, state):
    Q_values = black_model.predict(state, verbose=0)[0]
    legal_q_values = Q_values[env.legal_actions]
    action = env.legal_actions[np.argmax(legal_q_values)]
    return action

In [6]:
def generate_WHITE_scenario(AGENT_POLICY):
    env = gym.make(
        "ChessAlphaZero-v0"
    )  # We will use Alpha Zero's numenclature for the actions encodings

    state = env.reset()
    state = preprocess_state(state)
    done = False
    counter = 0  # Since each step represents a play in a chess game we are going to store the number of steps associated to the episode/game
    while not done:
        if (
            counter % 2 == 0
        ):  # If the step number is pair, this means that it is the WHITE player's turn
            print("White's turn to play!\n", env.render(mode='unicode'))
            action = AGENT_POLICY(env, state)
            next_state, reward, done, info = env.step(action)

            #pre-process next state
            next_state = preprocess_state(next_state)

        else:  # If the step number is not pair, aka odd, this means that it is the BLACK player's turn
            illegal_action = True
            print("Your turn to play!\n", env.render(mode='unicode'))
            while illegal_action:
                decoded_action = input("Enter your move in format a1b1 please: ")
                try:
                    action = env.encode(chess.Move.from_uci(decoded_action))
                    next_state, reward, done, info = env.step(action)
                    illegal_action = False
                except (ValueError, AttributeError):
                    print("Invalid move, try again with format 'a1b1'.")
                    continue


            #pre-process next state
            next_state = preprocess_state(next_state)

        counter += 1

    env.close()

    return reward, np.ceil(counter / 2)


def generate_BLACK_scenario(AGENT_POLICY):
    env = gym.make(
        "ChessAlphaZero-v0"
    )  # We will use Alpha Zero's numenclature for the actions encodings

    state = env.reset()
    state = preprocess_state(state)
    done = False
    counter = 0  # Since each step represents a play in a chess game we are going to store the number of steps associated to the episode/game

    while not done:
        if (
            counter % 2 == 1
        ):  # If the step number is not pair, aka odd, this means that it is the BLACK player's turn
            print("Black's turn to play!\n", env.render(mode='unicode'))
            action = AGENT_POLICY(env, state)
            next_state, reward, done, info = env.step(action)

            #pre-process next state
            next_state = preprocess_state(next_state)

        else:  # If the step number is pair, this means that it is the WHITE player's turn
            illegal_action = True
            print("Your turn to play!\n", env.render(mode='unicode'))
            while illegal_action:
                decoded_action = input("Enter your move in format a1b1 please: ")
                try:
                    action = env.encode(chess.Move.from_uci(decoded_action))
                    next_state, reward, done, info = env.step(action)
                    illegal_action = False
                except (ValueError, AttributeError):
                    print("Invalid move, try again with format 'a1b1'.")
                    continue


            #pre-process next state
            next_state = preprocess_state(next_state)

        counter += 1

    env.close()

    return reward, np.ceil(counter / 2)

In [2]:
def CHESS_GAME( AGENT_POLICY_WHITE, AGENT_POLICY_BLACK):
    results_list = []
    print("Hello! Welcome to my attempt of a chess player algorithm :)\n"
      "Here are the rules:\n"
      "- You will play 1 game where you'll randomly be assigned as white or black player\n"
      "- The white pieces to not have background, and the black pieces have filled/opaque background\n"
      "- When it is your turn, a pop-up window will appear on the top where you write your moves\n"
      "- To insert your move, you need to write in format a1b1, where first two digits is initial position and last two are position you want to move to\n"
      "- The columns are a-h (from left to right), and the rows are 1-8 (1 is bottom, 8 is top)\n"
      "Hope you have fun!")
    
    player_black = random.choice([True, False]) #choose if player is white or black
    if player_black:
        generate_episode = generate_WHITE_scenario #generate white scenario, because algorithm plays white when player is black

        reward, n_steps = generate_episode(AGENT_POLICY_WHITE)

        if reward == 1:
            result = "VICTORY"
            print('YOU LOST! Try again next time :/')
        elif reward == 0:
            result = "DRAW"
            print('DRAW! Guess you are as smart as a machine ;)')
        else:
            result = "LOSS"
            print('YOU WON! CONGRATS!')

        results_list.append(["WHITE", result, n_steps])

    else:
        generate_episode = generate_BLACK_scenario

        reward, n_steps = generate_episode(AGENT_POLICY_BLACK)

        if reward == -1:
            result = "VICTORY"
            print('YOU LOST! Try again next time :/')
        elif reward == 0:
            result = "DRAW"
            print('DRAW! Guess you are as smart as a machine ;)')
        else:
            result = "LOSS"
            print('YOU WON! CONGRATS!')

        results_list.append(["BLACK", result, n_steps])


In [8]:
CHESS_GAME(WHITE_PLAYER_POLICY, BLACK_PLAYER_POLICY)

Hello! Welcome to my attempt of a chess player algorithm :)
Here are the rules:
- You will play 1 game where you'll randomly be assigned as white or black player
- The white pieces to not have background, and the black pieces have filled/opaque background
- To insert your move, you need to write in format a1b1, where first two digits is initial position and last two are position you want to move to
- The columns are a-h (from left to right), and the rows are 1-8 (1 is bottom, 8 is top)
Hope you have fun!
Your turn to play!
 ♜ ♞ ♝ ♛ ♚ ♝ ♞ ♜
♟ ♟ ♟ ♟ ♟ ♟ ♟ ♟
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
♙ ♙ ♙ ♙ ♙ ♙ ♙ ♙
♖ ♘ ♗ ♕ ♔ ♗ ♘ ♖
Invalid move, try again with format 'a1b1'.
Invalid move, try again with format 'a1b1'.
Black's turn to play!
 ♜ ♞ ♝ ♛ ♚ ♝ ♞ ♜
♟ ♟ ♟ ♟ ♟ ♟ ♟ ♟
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ♙ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
♙ ♙ ♙ ⭘ ♙ ♙ ♙ ♙
♖ ♘ ♗ ♕ ♔ ♗ ♘ ♖
Your turn to play!
 ♜ ♞ ♝ ♛ ♚ ♝ ⭘ ♜
♟ ♟ ♟ ♟ ♟ ♟ ♟ ♟
⭘ ⭘ ⭘ ⭘ ⭘ ♞ ⭘ ⭘
⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘ ⭘
⭘ ⭘ ⭘ ♙ ⭘ ⭘ ⭘ ⭘
⭘ ⭘