In [26]:
from stockfish import Stockfish
import chess

stockfish_path = "/Users/benitorusconi/Documents/CDS/05_HS23/Reinforcement Learning (cds-117)/engine/stockfish"
stockfish = Stockfish(path=stockfish_path)

def print_board(board):
    print(board)

def play_game():
    board = chess.Board()

    print("Chess game against Stockfish\n")

    while not board.is_game_over():
        print_board(board)

        # Player's move
        player_move = input("Your move (in algebraic notation): ")
        if chess.Move.from_uci(player_move) in board.legal_moves:
            board.push_uci(player_move)
        else:
            print("Invalid move. Try again.")
            continue

        if board.is_game_over():
            break

        # Stockfish's move
        stockfish.set_fen_position(board.fen())
        stockfish_move = stockfish.get_best_move()
        print("Stockfish's move:", stockfish_move)
        board.push_uci(stockfish_move)

    print("\nGame Over")
    print("Result:", board.result())

# Play the game
#play_game()


In [27]:
from stockfish import Stockfish
import chess
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.compat.v1.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from IPython.display import display, HTML
import chess.svg

stockfish_path = "/Users/benitorusconi/Documents/CDS/05_HS23/Reinforcement Learning (cds-117)/engine/stockfish"
stockfish = Stockfish(path=stockfish_path)

learning_rate = 0.001
discount_factor = 0.9
exploration_prob = 0.4

state_space_size = 64
action_space_size = 1000
q_table = np.zeros((state_space_size, action_space_size))

model = Sequential([
    Dense(64, input_shape=(state_space_size,), activation='relu'),
    Dense(128, activation='relu'),
    Dense(action_space_size, activation='linear')
])

model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

def state_to_index(board):
    return hash(board.fen()) % state_space_size

def choose_action(board):
    if np.random.rand() < exploration_prob:
        return np.random.choice(list(board.legal_moves))
    else:
        state_index = state_to_index(board)
        legal_moves_list = list(board.legal_moves)
        if not legal_moves_list:
            return chess.Move.null()
        best_move_index = np.argmax(q_table[state_index])
        best_move_uci = legal_moves_list[min(best_move_index, len(legal_moves_list)-1)].uci()
        return chess.Move.from_uci(best_move_uci)

def update_q_table(state, action, reward, next_state):
    state_index = state_to_index(state)
    next_state_index = state_to_index(next_state)
    action_index = list(state.legal_moves).index(action)
    best_next_action = np.argmax(q_table[next_state_index])
    q_table[state_index, action_index] += learning_rate * (
        reward + discount_factor * q_table[next_state_index, best_next_action] - q_table[state_index, action_index]
    )

def display_chess_board(board):
    return display(HTML(chess.svg.board(board=board, size=400)))

def play_game():
    board = chess.Board()
    game_states = []

    while not board.is_game_over():
        state = board.copy()
        game_states.append(state.copy())

        rl_move = choose_action(board)
        if rl_move in board.legal_moves:
            board.push(rl_move)
        else:
            print("Invalid move. Try again.")
            continue

        reward = 0

        if board.is_game_over():
            break

        stockfish.set_fen_position(board.fen())
        stockfish_move_uci = stockfish.get_best_move()
        stockfish_move = chess.Move.from_uci(stockfish_move_uci)
        next_state = board.copy()
        board.push(stockfish_move)

        if next_state.is_check():
            reward = 0.5

        if board.result() == "1-0":
            reward = 1000 # Win
        elif board.result() == "0-1":
            reward = -1000  # Loss
        elif board.result() == "1/2-1/2":
            reward = 100  # Draw

        # Capture rewards based on piece values
        if board.is_capture(rl_move):
            captured_piece_value = piece_value(board.piece_at(rl_move.to_square))
            reward += captured_piece_value

        if board.is_capture(stockfish_move):
            captured_piece_value = piece_value(board.piece_at(stockfish_move.to_square))
            reward -= captured_piece_value *1.01

        update_q_table(state, rl_move, reward, next_state)

    game_states.append(board.copy())
    return game_states, board.result()

def piece_value(piece):
    # Assign values to pieces
    if piece is None:
        return 0
    elif piece.piece_type == chess.PAWN:
        return 1
    elif piece.piece_type == chess.KNIGHT:
        return 3
    elif piece.piece_type == chess.BISHOP:
        return 3
    elif piece.piece_type == chess.ROOK:
        return 5
    elif piece.piece_type == chess.QUEEN:
        return 9
    elif piece.piece_type == chess.KING:
        return 10


log_dir = "logs/"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

num_games = 100
for episode in range(num_games):
    print("Game:", episode)
    total_reward = 0
    steps = 0
    game_states, result = play_game()

    with tf.summary.create_file_writer(log_dir).as_default():
        tf.summary.scalar('Total Reward', total_reward, step=episode)
        tf.summary.scalar('Steps', steps, step=episode)
        tf.summary.flush()

        for layer in model.layers:
            for weight in layer.weights:
                tf.summary.histogram(weight.name, weight, step=episode)

    # Calculate and print the sum of absolute deviations for each game
    sum_deviation = 0.0

    if episode == num_games - 1:
        # Display the last game
        for state in game_states:
            display_chess_board(state)


Game: 0
Game: 1
Game: 2
Game: 3
Game: 4
Game: 5
Game: 6
Game: 7
Game: 8
Game: 9
Game: 10
Game: 11
Game: 12
Game: 13
Game: 14
Game: 15
Game: 16
Game: 17
Game: 18
Game: 19
Game: 20
Game: 21
Game: 22
Game: 23
Game: 24
Game: 25
Game: 26
Game: 27
Game: 28
Game: 29
Game: 30
Game: 31
Game: 32
Game: 33
Game: 34
Game: 35
Game: 36
Game: 37
Game: 38
Game: 39
Game: 40
Game: 41
Game: 42
Game: 43
Game: 44
Game: 45
Game: 46
Game: 47
Game: 48
Game: 49
Game: 50
Game: 51
Game: 52
Game: 53
Game: 54
Game: 55
Game: 56
Game: 57
Game: 58
Game: 59
Game: 60
Game: 61
Game: 62
Game: 63
Game: 64
Game: 65
Game: 66
