In [211]:
import chess
import chess.engine
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import base64

In [212]:
class Node:
    def __init__(self, state):
        self.state = state
        self.visits = 0
        self.total_score = 0
        self.children = []

In [213]:
# Define the MCTS agent
class MCTSAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

    def get_action(self, state, temperature=1, num_simulations=100):
        state_str = self.convert_state_to_fen(state)
        print("State FEN:", state_str)  # Print the state FEN for debugging
        root_node = Node(state_str)
        temp_board = chess.Board()
        temp_board.set_fen(state_str)
            
        # Selection
        while len(node.children) != 0:
            action, child = max(node.children.items(), key=lambda x: self._uct_value(x[1], node.visit_count))
            temp_board.push(chess.Move.from_uci(action))
            node = child

        # Expansion
        if not temp_board.is_game_over():
            legal_moves = [move.uci() for move in temp_board.legal_moves]
            for move in legal_moves:
                new_state = self._get_state_after_action(state, move)
                if move not in node.children:
                    node.add_child(move, MCTSNode(new_state))
                    break
        # Simulation
        simulation_result = self._simulate(temp_board)

        # Backpropagation
        while node is not None:
            node.visit_count += 1
            node.total_reward += simulation_result
            node = node.parent

        action = max(root_node.children, key=lambda x: root_node.children[x].visit_count)

        return action
    def _simulate(self, board):
        engine = chess.engine.SimpleEngine.popen_uci("C:/Users/sofia/NOVA IMS/1st year/Semester 2/Reinforcement Learning/stockfish_15.1_win_x64_avx2/stockfish_15.1_win_x64_avx2/stockfish-windows-2022-x86-64-avx2")
        result = engine.play(board, chess.engine.Limit(time=2))
        engine.quit()
        return 1 if result.board().result() == "1-0" else -1 if result.board().result() == "0-1" else 0

    def _uct_value(self, child_node, parent_visit_count):
        return child_node.total_reward / child_node.visit_count + np.sqrt(2 * np.log(parent_visit_count) / child_node.visit_count)

    def _get_state_after_action(self, state, action):
        board = chess.Board()
        board.set_fen(chess.Board.fen(chess.Board(state)))
        board.push(chess.Move.from_uci(action))
        return board.fen()
    def index_to_piece(self, index):
        piece_mapping = {
            1: 'p', 2: 'n', 3: 'b', 4: 'r', 5: 'q', 6: 'k',
            7: 'P', 8: 'N', 9: 'B', 10: 'R', 11: 'Q', 12: 'K'
        }
        return piece_mapping.get(index, ' ')
    def convert_state_to_fen(self, state):
        state_list = state.reshape((8, 8))
        fen_str = ""
        empty_count = 0
        for row in state_list:
            fen_row = ""
            for cell in row:
                if cell == 0:
                    empty_count += 1
                else:
                    if empty_count > 0:
                        fen_row += str(empty_count)
                        empty_count = 0
                    fen_row += self.index_to_piece(cell)
                    fen_row += self.index_to_piece(cell)
            if empty_count > 0:
                fen_row += str(empty_count)
                empty_count = 0
            fen_str += fen_row + "/"
        fen_str = fen_str.rstrip("/")
        fen_str += " w - - 0 1"  # Add the turn information (defaulting to White's turn)
        return fen_str


In [214]:
# Define the chess environment
class ChessEnvironment:
    def __init__(self):
        self.board = chess.Board()

    def get_state(self):
        state = np.zeros(64, dtype=np.uint8)
        for i in range(64):
            piece = self.board.piece_at(i)
            if piece is not None:
                state[i] = self._piece_to_index(piece)
        return state
    def _piece_to_index(self, piece):
        piece_map = {
            chess.PAWN: 1,
            chess.ROOK: 2,
            chess.KNIGHT: 3,
            chess.BISHOP: 4,
            chess.QUEEN: 5,
            chess.KING: 6
        }
        return piece_map[piece.piece_type] * (1 if piece.color == chess.WHITE else -1)

    def step(self, action):
        move = chess.Move.from_uci(action)
        self.board.push(move)
        done = self.board.is_game_over()
        reward = 1 if done and self.board.result() == "1-0" else -1 if done and self.board.result() == "0-1" else 0
        return self.get_state(), reward, done

    def reset(self):
        self.board = chess.Board()
        return self.get_state()

In [215]:
# Define the MCTS node
class MCTSNode:
    def __init__(self, state):
        self.state = state
        self.visit_count = 0
        self.total_reward = 0
        self.children = {}

    def add_child(self, action, child_node):
        self.children[action] = child_node

In [216]:
def train_agent():
    env = ChessEnvironment()
    state_size = 64
    action_size = 4096
    agent = MCTSAgent(state_size, action_size)

    # Initialize the neural network
    model = Sequential()
    model.add(Dense(128, input_dim=state_size, activation='relu'))
    model.add(Dense(action_size, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=Adam())
    episodes = 1000
    for episode in range(episodes):
        state = env.reset()

        done = False
        while not done:
            action = agent.get_action(state)
            next_state, reward, done = env.step(action)

            state = next_state

        print("Episode:", episode + 1)

        # Generate training data
        X_train = []
        y_train = []
        for _ in range(100):  # Generate 100 training samples per episode
            state = env.reset()
            done = False
            while not done:
                action = agent.get_action(state)
                next_state, reward, done = env.step(action)

                X_train.append(state)
                y = np.zeros(action_size)
                y[action] = reward
                y_train.append(y)

                state = next_state

        X_train = np.array(X_train)
        y_train = np.array(y_train)
        # Reshape input data
        X_train = X_train.reshape(-1, state_size)

        # Train the neural network on the entire dataset
        model.fit(X_train, y_train, epochs=1, verbose=0)

    # Save the trained model
    model.save('chess_agent_model.h5')

In [217]:
# Start training the agent
train_agent()

State FEN: nnbbrrqqkkrrbbnn/pppppppppppppppp/8/8/8/8/                /                 w - - 0 1


ValueError: expected 'w' or 'b' for turn part of fen: 'nnbbrrqqkkrrbbnn/pppppppppppppppp/8/8/8/8/                /                 w - - 0 1'