# Reinforcement Learning mit Python - Running Kings Schach KI
## Aufgabenstellung

Erstellen  Sie  Modelle,  welche  das  Spiel  Racing  Kings erlernen.  Am  Ende  sollte  ein  Spiel  gegen  den erstellten  Algorithmus  möglich  sein.  Testen  Sie  als  Gegner  einenAlgorithmus,  welcher  Zufallszüge ausführt,  und  bewerten  Sie  ihren  Algorithmus. Nutzen  Sie  die  chess-Bibliothek  (https://python-chess.readthedocs.io/en/latest/)

Bonus: Vergleichen Sie einen modellfreien und einen modellbasierten Ansatz.

Diese Anleitung wurde verwendet um zügig mit der verwendung der chess bibliothek starten zu können.
https://jupyter.brynmawr.edu/services/public/dblank/CS371%20Cognitive%20Science/2016-Fall/Programming%20a%20Chess%20Player.ipynb

# Install Libraries (MacOS)

In [None]:
!pip3 install numpy
!pip3 install chess
!pip3 install pydot
!pip3 install gym
!pip3 install stable_baselines3

# Define chess environment

In [17]:
def display_board(board, use_svg):
    if use_svg:
        return board._repr_svg_()
    else:
        return "<pre>" + str(board) + "</pre>"

In [18]:
def who(player):
    return "White" if player == chess.WHITE else "Black"

In [19]:
import chess.variant
import random
import time
from IPython.display import display, HTML, clear_output
import numpy as np
import gym
from gym.spaces import Discrete, Box, Dict

N_DISCRETE_ACTIONS = 4096

# from Learning_Chess pdf
class RacingKingsEnvironment(gym.Env):
    def __init__(self):
        super(RacingKingsEnvironment, self).__init__()
        self.board = chess.variant.RacingKingsBoard()
        self.reward = 0
        self.action_space = Discrete(N_DISCRETE_ACTIONS)
        self.observation_space = Box(low=0, high=1, shape=(14, 8, 8), dtype=np.uint8)
    def board_square_to_index(self, name):
        return (int(name[1])-1) * 8 + (ord(name[0])-97) 
    def action_index_to_uci(self, index):
        index_from = index//64
        index_to = index%64
        name = chr(index_from%8 + 97) + str(index_from//8 +1) + chr(index_to%8 + 97) + str(index_to//8 + 1)
        return name
    def action_uci_to_index(self, uci):
        index_from = (int(uci[1])-1) * 8 + (ord(uci[0])-97) 
        index_to = (int(uci[3])-1) * 8 + (ord(uci[2])-97) 
        return index_from*64 + index_to
    @property
    def actions(self):
        moves = list(self.board.legal_moves)
        moves_string = []
        for move in moves:
            moves_string.append(move.uci())
        boardActions = np.zeros(N_DISCRETE_ACTIONS, dtype=np.int8)
        for move in moves_string:
            boardActions[self.board_square_to_index(move[0:2])*64 + self.board_square_to_index(move[2:4])] = 1
        return boardActions
    @property
    def states(self):
        boardState = np.zeros((14, 8, 8), dtype=np.int8)
        for piece in chess.PIECE_TYPES:
            for square in self.board.pieces(piece, chess.WHITE):
                idNum = square//8
                idAlph = square%8
                boardState[piece - 1][7 - idNum][idAlph] = 1
            for square in self.board.pieces(piece, chess.BLACK):
                idNum = square//8
                idAlph = square%8
                boardState[piece + 5][7 - idNum][idAlph] = 1        
        
            aux = self.board.turn
            self.board.turn = chess.WHITE
            for move in list(self.board.legal_moves):
                square = self.board_square_to_index(move.uci())
                idNum = square//8
                idAlph = square%8
                boardState[12][7 - idNum][idAlph] = 1
            self.board.turn = chess.BLACK
            for move in list(self.board.legal_moves):
                square = self.board_square_to_index(move.uci())
                idNum = square//8
                idAlph = square%8
                boardState[13][7 - idNum][idAlph] = 1
            self.board.turn = aux
        return boardState
    
    def step(self, action, isGame = False):
        done = False
        step_reward = 0
        info = {}
        
        # check if it is not your turn -> then make a random move
        # when uncommenting this code the ai will play moves for both players
        if not isGame:
            if self.board.turn == chess.WHITE:
                try:
                    self.board.push(random.choice(list(self.board.legal_moves)))
                    info = {"msg":"White Did a valid move"}
                except:
                    info = {"msg":"Passed an already finished board"} 
                    done = True
          

        if not self.board.is_game_over(claim_draw=True):
            if action is not None:
                try:
                    self.board.push_uci(self.action_index_to_uci(action))
                    #step_reward += 1
                    info = {"msg":"Did a valid move"}
                except:
                    #step_reward -= 1
                    info = {"msg":"Action is not a valid move"}
                    done = True
                
                if self.board.is_variant_end():
                    if who(not self.board.turn) == "Black":
                        step_reward += 100
                        info = {"msg":"AI won the game!"} 
                        done = True
                    else:
                        step_reward -= 100
                        info = {"msg":"Opponent won the game!"} 
                        done = True
                    #info = {"msg":"racing kings: " + who(not self.board.turn) + " wins!"}
        else:
            step_reward -=10
            done = True
            info = {"msg":"game over"}

        self.reward += step_reward
        
        
        return self.states, step_reward, done, info
    
    
    def reset(self):
        # reset the board
        self.board.reset()
        # play random amount of actions
        for i in range((random.randint(0, 30)*2)):
            try:
                move = random.choice(list(self.board.legal_moves))
                self.board.push(move)
            except:
                self.board.reset()
                
        self.reward = 0.0
        return self.step(None)[0]  # reward, done, info can't be included
    
    
    def render(self, mode="human", pause=0.2):
        name = who(self.board.turn)
        use_svg = (mode == "human")
        board_stop = display_board(self.board, use_svg)
        html = "<b>Move %s %s:</b><br/>%s" % (
                    len(self.board.move_stack), name, board_stop)
        if mode is not None:
                if mode == "human":
                    clear_output(wait=True)
                display(HTML(html))
                if mode == "human":
                    time.sleep(pause)
    def close (self):
        print("closing")

# Test Functionality of Chess environment randomly

In [None]:
# Set number of test games
episodes = 10
# Create Environment using RacingKings class defined above
env = RacingKingsEnvironment()
# Start test loop
for episode in range(1, episodes+1):
    # Reset environment
    state = env.reset()
    done = False
    score = 0 
    # Start game loop
    while not done:
        # Disable Rendering for speed enhancement
        env.render(mode=None)
        # Create sample move from action space
        action = env.action_space.sample()
        # Make step
        n_state, reward, done, info = env.step(action)
        score+=reward
    # Print out game info
    print('Episode:{} Score:{} Info:{}'.format(episode, score, info))
# Closing environment
env.close()

# Training and saving Model using stable baselines

In [None]:
# Import necessary dependencies
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy 
from stable_baselines3.common.callbacks import EvalCallback 
import os

In [None]:
# Define save path for Log Files
logging_path = os.path.join('Training', 'Logs') 

In [None]:
# Create Environment using RacingKings class defined above
env = RacingKingsEnvironment()
# Vectorize environment
env = DummyVecEnv([lambda: env]) 
# Create model using PPO algorithm and MLP Policy, verbose=1 for Info return
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=logging_path) 

In [None]:
# Define save path for callback model saving
best_save_path = os.path.join('Training', 'SavedModels', 'Best_Racing_Kings')
# Define callback for training saving best model every 20k timesteps
eval_callback = EvalCallback(env, eval_freq=20000, best_model_save_path = best_save_path, verbose=1)

In [None]:
# Start model training using callback defined above
model.learn(total_timesteps=100000, callback=eval_callback)

Saving the model

In [None]:
# Define save path for trained model
Model_Path = os.path.join('Training', 'SavedModels', 'PPO_Racing_Kings_Alex_neu')

In [None]:
# Save model using defined save path
model.save(Model_Path)

In [None]:
# Delete model from
del model

In [None]:
# Load model from defined path (Use also to load already trained model)
model = PPO.load(Model_Path, env=env)

# Training Evaluation

In [None]:
# Evaluate trained model using evaluate policy from Stable Baselines 
evaluate_policy(model, env, n_eval_episodes=10, render=True)

# Play Game Human vs AI

In [None]:
# Create environment from class RacingKings
env = RacingKingsEnvironment()
# Define necessary variables
obs = None
done = False
score = 0
# Reset chess board to ensure fresh start
env.board.reset()    
# Start game loop
while not done:
    #Activate render mode 
    env.render()
    # Check if turn is white or black (Human plays white)
    if who(env.board.turn) == 'White':
        # Print out formatted list of possible moves at each step as a reminder for human player
        print('Gueltige Züge:')
        legal_moves = list(env.board.legal_moves)
        legal_moves = map(lambda move: move.uci(), legal_moves)
        print(*legal_moves, sep = ", ")
        # Ask for human step via input of UCI move     
        action = env.action_uci_to_index(input ('Ihr nächster Zug:'))
        # Make step (Parameter 'True' to trigger game functionality in environment)
        obs, reward, done, info = env.step(action,True)
        score+=reward
        
    else:
        # Create AI step using predict function
        action = model.predict(obs)
        # Make step (position 0 because predict function returns tuple)
        obs, reward, done, info = env.step(action[0],True)
        score+=reward
# End game loop

# Render last state of chess board after game ends        
env.render()
# Print out game info
print('Score:{} Info:{}'.format(score, info))
# Closinq environment
env.close()

# Play Game Random vs AI

In [None]:
# Create environment from class RacingKings
env = RacingKingsEnvironment()
# Define necessary variables
obs = None
done = False
score = 0 
# Reset chess board to ensure fresh start
env.board.reset() 
# Start game loop
while not done:
    #Activate render mode (can be commented out for enhanced speed)
    env.render()
    # Check if turn is white or black (Random plays white)   
    if who(env.board.turn) == 'White':
        # Create random move from list of legal moves
        legal_moves = list(env.board.legal_moves)
        random_move = random.choice(legal_moves)
        # Convert random move from UCI to Index
        action = env.action_uci_to_index(random_move.uci())
        # Make step (Parameter 'True' to trigger game functionality in environment)
        obs, reward, done, info = env.step(action,True)
        score+=reward
        
    else:
        # Create AI step using predict function
        action = model.predict(obs)
        # Make step (position 0 because predict function returns tuple)
        obs, reward, done, info = env.step(action[0],True)
        score+=reward
# End game loop

# Render last state of chess board after game ends 
env.render()
# Print out game info
print('Score:{} Info:{}'.format(score, info))
# Closinq environment
env.close()

# Play Game Random vs Random

In [None]:
# Code to test environment and enhance game comprehension

# Create environment from class RacingKings
env = RacingKingsEnvironment()
# Define necessary variables
obs = None
done = False
score = 0
# Reset chess board to ensure fresh start
env.board.reset()
# Start game loop
while not done:
    #Activate render mode (can be commented out for enhanced speed)
    env.render()
    # Create random move from list of legal moves
    legal_moves = list(env.board.legal_moves)
    random_move = random.choice(legal_moves)
    # Convert random move from UCI to Index
    action = env.action_uci_to_index(random_move.uci())
    # Make step (Parameter 'True' to trigger game functionality in environment)
    obs, reward, done, info = env.step(action,True)
    score+=reward
# End game loop

# Render last state of chess board after game ends 
env.render()
# Print out game info
print('Score:{} Info:{}'.format(score, info))
# Closinq environment
env.close()