In [1]:
from collections import deque
import random
import chess
import chess.variant
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Input
from tensorflow.compat.v1.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from IPython.display import display, HTML
import chess.svg
import matplotlib.pyplot as plt
from tqdm import tqdm

# Chess Variant Antichess

def board_to_input_array(board):
    board_array = np.zeros((8, 8, 12), dtype=np.uint8)
    piece_mapping = {'r': 0, 'n': 1, 'b': 2, 'q': 3, 'k': 4, 'p': 5, 'R': 6, 'N': 7, 'B': 8, 'Q': 9, 'K': 10, 'P': 11}

    for square, piece in board.piece_map().items():
        piece_type = piece_mapping[piece.symbol()]
        color = int(piece.color)
        board_array[square // 8, square % 8, piece_type] = color + 1
    return board_array

def state_to_index(board):
    board_array = np.array(board_to_input_array(board))
    return hash(board_array.tostring()) % state_space_size[0]

def choose_action(board, model):
    if np.random.rand() < exploration_prob:
        return np.random.choice(list(board.legal_moves))
    else:
        state_index = state_to_index(board)
        legal_moves_list = list(board.legal_moves)
        if not legal_moves_list:
            return chess.Move.null()
        q_values = model.predict(np.array([board_to_input_array(board)]))[0]
        best_move_index = np.argmax(q_values)
        best_move_uci = legal_moves_list[min(best_move_index, len(legal_moves_list)-1)].uci()
        return chess.Move.from_uci(best_move_uci)

def move_to_output_array(move, legal_moves):
    output_array = np.zeros(action_space_size)
    move_index = list(legal_moves).index(move)
    output_array[move_index] = 1
    return output_array

# Hyperparameters
learning_rate = 0.01
discount_factor = 0.99
exploration_prob = 0.2
state_space_size = (8, 8, 12)
action_space_size = 4096
experience_replay_buffer = deque(maxlen=10000)

# Neural Network Model alpha zero
input_layer = Input(shape=state_space_size)
conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_layer)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
flatten_layer = Flatten()(conv2)
dense1 = Dense(64, activation='relu')(flatten_layer)
dense2 = Dense(64, activation='relu')(dense1)
output_layer = Dense(action_space_size, activation='softmax')(dense2)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.1), loss=['categorical_crossentropy'], metrics=['accuracy'])

def count_pieces_by_color(board, color):
    piece_types = [chess.PAWN, chess.KNIGHT, chess.BISHOP, chess.ROOK, chess.QUEEN, chess.KING]
    return sum(len(board.pieces(piece_type, color)) for piece_type in piece_types)

def normalize_input(board):
    board_array = np.array(board_to_input_array(board), dtype=np.float16)
    board_array /= 12.0
    return board_array

def update_q_table(state, action, reward, next_state):
    state_index = state_to_index(state)
    next_state_index = state_to_index(next_state)
    action_index = list(state.legal_moves).index(action)

    total_reward = reward
    experience_replay_buffer.append((state_index, action_index, total_reward, next_state_index))
    batch_size = min(len(experience_replay_buffer), 8)
    if batch_size > 0:
        batch = np.array(random.sample(experience_replay_buffer, batch_size))
        states = np.array([board_to_input_array(chess.Board(fen=chess.STARTING_FEN)) for _ in batch[:, 0]])
        next_states = np.array([board_to_input_array(chess.Board(fen=chess.STARTING_FEN)) for _ in batch[:, 3]])
        q_values = model.predict(states)
        next_q_values = model.predict(next_states)
        
        for i in range(batch_size):
            action_idx = int(batch[i, 1])
            q_values[i, action_idx] += learning_rate * (batch[i, 2] + discount_factor * np.max(next_q_values[i]) - q_values[i, action_idx])
        
        model.train_on_batch(states, q_values)

def calculate_reward(board):
    reward = 0
    piece_count = len(board.piece_map())
    reward -= (32 - piece_count) * 0.1

    if board.is_stalemate() or board.is_insufficient_material():
        reward -= 5
    elif board.is_fivefold_repetition() or board.is_seventyfive_moves():
        reward -= 5
    return reward

def create_new_model():
    new_model = Model(inputs=input_layer, outputs=output_layer)
    new_model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.1), loss=['categorical_crossentropy'], metrics=['accuracy'])
    return new_model

def train_model_self_play(num_games, model):
    for _ in range(num_games):
        play_game(model, model)

def play_game(model1, model2):
    board = chess.variant.GiveawayBoard()
    while not board.is_game_over():
        if board.turn == chess.WHITE:
            move = choose_action(board, model1)
        else:
            move = choose_action(board, model2)
        board.push(move)
    return board.result()

def train_new_player(best_player_model, new_player_model, threshold_win_rate=0.55, num_games=200):
    new_player_wins = 0
    for game in range(num_games):
        if random.choice([True, False]):
            result = play_game(new_player_model, best_player_model)
            if result == "1-0":
                new_player_wins += 1
        else:
            result = play_game(best_player_model, new_player_model)
            if result == "0-1":
                new_player_wins += 1

        win_rate = new_player_wins / (game + 1)
        if win_rate >= threshold_win_rate:
            print(f"New player has achieved a win rate of {win_rate}. It becomes the best player.")
            return new_player_model

    print(f"New player did not achieve the required win rate. Best player remains unchanged.")
    return best_player_model

# Load or create initial best player model
try:
    best_player_model = load_model("best_player.h5")
except IOError:
    print("No initial model found. Training a new model.")
    best_player_model = create_new_model()
    train_model_self_play(200, best_player_model)

# Main training and updating loop
while True:
    new_player_model = create_new_model()
    best_player_model = train_new_player(best_player_model, new_player_model)
    best_player_model.save("best_player.h5")
