In [1]:
from stockfish import Stockfish
import chess

stockfish_path = "/Users/benitorusconi/Documents/CDS/05_HS23/Reinforcement Learning (cds-117)/engine/stockfish"
stockfish = Stockfish(path=stockfish_path)

def print_board(board):
    print(board)

def play_game():
    board = chess.Board()

    print("Chess game against Stockfish\n")

    while not board.is_game_over():
        print_board(board)

        # Player's move
        player_move = input("Your move (in algebraic notation): ")
        if chess.Move.from_uci(player_move) in board.legal_moves:
            board.push_uci(player_move)
        else:
            print("Invalid move. Try again.")
            continue

        if board.is_game_over():
            break

        # Stockfish's move
        stockfish.set_fen_position(board.fen())
        stockfish_move = stockfish.get_best_move()
        print("Stockfish's move:", stockfish_move)
        board.push_uci(stockfish_move)

    print("\nGame Over")
    print("Result:", board.result())

# Play the game
play_game()


Chess game against Stockfish

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


InvalidMoveError: expected uci string to be of length 4 or 5: ''

In [27]:
from stockfish import Stockfish
import chess
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard
#import adam legacy
from tensorflow.compat.v1.keras.optimizers import Adam

stockfish_path = "/Users/benitorusconi/Documents/CDS/05_HS23/Reinforcement Learning (cds-117)/engine/stockfish"
stockfish = Stockfish(path=stockfish_path)

# Define the Q-learning parameters
learning_rate = 0.1
discount_factor = 0.9
exploration_prob = 0.2

# Initialize the Q-table
state_space_size = 64  # Assuming a simplified state space
action_space_size = 4096  # Number of possible chess moves
q_table = np.zeros((state_space_size, action_space_size))

def state_to_index(board):
    # Convert the board state to a unique index for the Q-table
    return hash(board.fen()) % state_space_size

def choose_action(board):
    # Epsilon-greedy policy for action selection
    if np.random.rand() < exploration_prob:
        return np.random.choice(list(board.legal_moves))
    else:
        state_index = state_to_index(board)
        legal_moves_list = list(board.legal_moves)
        if not legal_moves_list:
            return chess.Move.null()  # No legal moves, return a null move
        best_move_index = np.argmax(q_table[state_index])
        best_move_uci = legal_moves_list[min(best_move_index, len(legal_moves_list)-1)].uci()
        return chess.Move.from_uci(best_move_uci)


def update_q_table(state, action, reward, next_state):
    # Q-learning update rule
    state_index = state_to_index(state)
    next_state_index = state_to_index(next_state)
    
    # Convert chess.Move to index
    action_index = list(state.legal_moves).index(action)
    
    best_next_action = np.argmax(q_table[next_state_index])
    
    q_table[state_index, action_index] += learning_rate * (
        reward + discount_factor * q_table[next_state_index, best_next_action] - q_table[state_index, action_index]
    )

def print_board(board):
    print(board)

def play_game():
    board = chess.Board()



    while not board.is_game_over():
        #print_board(board)
        #priint number of moves
        print("Number of moves:", board.fullmove_number)
        # Player's move (replaced by the RL model's move)
        rl_move = choose_action(board)
        if rl_move in board.legal_moves:
            state = board.copy()
            board.push(rl_move)
        else:
            print("Invalid move. Try again.")
            continue

        if board.is_game_over():
            break

        # Stockfish's move
        stockfish.set_fen_position(board.fen())
        stockfish_move_uci = stockfish.get_best_move()
        print("Stockfish's move:", stockfish_move_uci)
        stockfish_move = chess.Move.from_uci(stockfish_move_uci)
        next_state = board.copy()
        board.push(stockfish_move)

    # Reward calculation (you may define your own reward function)
        reward = 0

        if board.result() == "1-0":
            # Win
            reward = float('inf')  # Infinite high reward
        elif board.result() == "0-1":
            # Loss
            reward = -float('inf')  # Infinite low reward
        elif board.result() == "1/2-1/2":
            # Draw
            reward = 0.1  # Small positive reward for a draw
        elif board.is_capture(rl_move):
            # Opponent's piece captured
            reward = 0.01  # Very small positive reward for a piece capture
        elif board.is_capture(stockfish_move):
            # Your own piece captured
            reward = -0.01  # Very small negative reward for your piece getting captured

        update_q_table(state, rl_move, reward, next_state)




        print("\nGame Over")
        print("Result:", board.result())

# Play multiple games



In [30]:
log_dir = "logs/"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Create a model (you may need a more complex model based on your problem)

# Play multiple games
num_games = 10
for episode in range(num_games):
    total_reward = 0
    steps = 0
    play_game()

    # Log metrics to TensorBoard after each episode
    with tf.summary.create_file_writer(log_dir).as_default():
        tf.summary.scalar('Total Reward', total_reward, step=episode)
        tf.summary.scalar('Steps', steps, step=episode)
        tf.summary.flush() 

        # Optionally, log additional metrics such as the model's weights or gradients
        for layer in model.layers:
            for weight in layer.weights:
                tf.summary.histogram(weight.name, weight, step=episode)

# model.save("trained_model.h5")



#model.save("trained_model.h5")


Number of moves: 1
Stockfish's move: g8f6

Game Over
Result: *
Number of moves: 2
Stockfish's move: d7d5

Game Over
Result: *
Number of moves: 3
Stockfish's move: c7c5

Game Over
Result: *
Number of moves: 4
Stockfish's move: h8h7

Game Over
Result: *
Number of moves: 5
Stockfish's move: c5d4

Game Over
Result: *
Number of moves: 6
Stockfish's move: f6e4

Game Over
Result: *
Number of moves: 7
Stockfish's move: b8c6

Game Over
Result: *
Number of moves: 8
Stockfish's move: d8b6

Game Over
Result: *
Number of moves: 9
Stockfish's move: b6f2

Game Over
Result: 0-1
Number of moves: 1
Stockfish's move: d7d5

Game Over
Result: *
Number of moves: 2
Stockfish's move: e7e5

Game Over
Result: *
Number of moves: 3
Stockfish's move: b8c6

Game Over
Result: *
Number of moves: 4
Stockfish's move: d5e4

Game Over
Result: *
Number of moves: 5
Stockfish's move: e5f4

Game Over
Result: *
Number of moves: 6
Stockfish's move: b7a6

Game Over
Result: *
Number of moves: 7
Stockfish's move: h7h5

Game Over
