In [1]:
from stockfish import Stockfish
import chess

stockfish_path = "/Users/benitorusconi/Documents/CDS/05_HS23/Reinforcement Learning (cds-117)/engine/stockfish"
stockfish = Stockfish(path=stockfish_path)

def print_board(board):
    print(board)

def play_game():
    board = chess.Board()

    print("Chess game against Stockfish\n")

    while not board.is_game_over():
        print_board(board)

        # Player's move
        player_move = input("Your move (in algebraic notation): ")
        if chess.Move.from_uci(player_move) in board.legal_moves:
            board.push_uci(player_move)
        else:
            print("Invalid move. Try again.")
            continue

        if board.is_game_over():
            break

        # Stockfish's move
        stockfish.set_fen_position(board.fen())
        stockfish_move = stockfish.get_best_move()
        print("Stockfish's move:", stockfish_move)
        board.push_uci(stockfish_move)

    print("\nGame Over")
    print("Result:", board.result())

# Play the game
play_game()


Chess game against Stockfish

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


InvalidMoveError: expected uci string to be of length 4 or 5: ''

In [14]:
from stockfish import Stockfish
import chess
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard

stockfish_path = "/Users/benitorusconi/Documents/CDS/05_HS23/Reinforcement Learning (cds-117)/engine/stockfish"
stockfish = Stockfish(path=stockfish_path)

# Define the Q-learning parameters
learning_rate = 0.1
discount_factor = 0.9
exploration_prob = 0.2

# Initialize the Q-table
state_space_size = 64  # Assuming a simplified state space
action_space_size = 4096  # Number of possible chess moves
q_table = np.zeros((state_space_size, action_space_size))

def state_to_index(board):
    # Convert the board state to a unique index for the Q-table
    return hash(board.fen()) % state_space_size

def choose_action(board):
    # Epsilon-greedy policy for action selection
    if np.random.rand() < exploration_prob:
        return np.random.choice(list(board.legal_moves))
    else:
        state_index = state_to_index(board)
        legal_moves_list = list(board.legal_moves)
        if not legal_moves_list:
            return chess.Move.null()  # No legal moves, return a null move
        best_move_index = np.argmax(q_table[state_index])
        best_move_uci = legal_moves_list[min(best_move_index, len(legal_moves_list)-1)].uci()
        return chess.Move.from_uci(best_move_uci)


def update_q_table(state, action, reward, next_state):
    # Q-learning update rule
    state_index = state_to_index(state)
    next_state_index = state_to_index(next_state)
    
    # Convert chess.Move to index
    action_index = list(state.legal_moves).index(action)
    
    best_next_action = np.argmax(q_table[next_state_index])
    
    q_table[state_index, action_index] += learning_rate * (
        reward + discount_factor * q_table[next_state_index, best_next_action] - q_table[state_index, action_index]
    )

def print_board(board):
    print(board)

def play_game():
    board = chess.Board()

    while not board.is_game_over():
        # Reset the Q-table at the beginning of each game
        q_table = np.zeros((state_space_size, action_space_size))

        while not board.is_game_over():
            print_board(board)

            # Player's move (replaced by the RL model's move)
            rl_move = choose_action(board)
            if rl_move in board.legal_moves:
                state = board.copy()
                board.push(rl_move)
            else:
                print("Invalid move. Try again.")
                continue

            if board.is_game_over():
                break

            # Stockfish's move
            stockfish.set_fen_position(board.fen())
            stockfish_move = stockfish.get_best_move()
            print("Stockfish's move:", stockfish_move)
            next_state = board.copy()
            board.push_uci(stockfish_move)

            # Reward calculation (you may define your own reward function)
            reward = 0 if board.result() == "*" else 1 if board.result() == "1-0" else -1

            # Update the Q-table
            update_q_table(state, rl_move, reward, next_state)

        print("\nGame Over")
        print("Result:", board.result())

# Play multiple games



In [16]:
log_dir = "logs/"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Create a model (you may need a more complex model based on your problem)
model = Sequential([
    Dense(64, input_shape=(state_space_size,), activation='relu'),
    Dense(action_space_size, activation='linear')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# Play multiple games
num_games = 10  # You can adjust the number of games
for episode in range(num_games):
    total_reward = 0
    steps = 0
    play_game()

    # Log metrics to TensorBoard after each episode
    with tf.summary.create_file_writer(log_dir).as_default():
        tf.summary.scalar('Total Reward', total_reward, step=episode)
        tf.summary.scalar('Steps', steps, step=episode)

# Optionally, save the model after training
#model.save("trained_model.h5")




r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R
Stockfish's move: d7d5
r n b q k b n r
p p p . p p p p
. . . . . . . .
. . . p . . . .
. . . . . . P .
. . . . . . . .
P P P P P P . P
R N B Q K B N R
Stockfish's move: c8g4
r n . q k b n r
p p p . p p p p
. . . . . . . .
. . . p . . . .
. . . . . . b .
. . . . . . . N
P P P P P P . P
R N B Q K B . R
Stockfish's move: e7e6
r n . q k b n r
p p p . . p p p
. . . . p . . .
. . . p . . N .
. . . . . . b .
. . . . . . . .
P P P P P P . P
R N B Q K B . R
Stockfish's move: g4d1
r n . q k b n r
p p p . . p p p
. . . . p . . .
. . . p . . N .
. . . . P . . .
. . . . . . . .
P P P P . P . P
R N B b K B . R
Stockfish's move: d1f3
r n . q k b n r
p p p . . p p N
. . . . p . . .
. . . p . . . .
. . . . P . . .
. . . . . b . .
P P P P . P . P
R N B . K B . R
Stockfish's move: f3h1
r n . q k N n r
p p p . . p p .
. . . . p . . .
. . . p . . . .
. . . . P . . .
. . . . . . . 

  saving_api.save_model(
