In [5]:
import chess
import numpy as np
import concurrent.futures
import time
from IPython.display import clear_output

from mcts import mcts
from nn import *     

In [9]:
# Create chess board and get input tensor
board = chess.Board()
input_tensor = board_to_input(board)

chess_model = create_chess_model()
chess_model.summary()
chess_model.evaluate = evaluate.__get__(chess_model)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 8, 8, 12)]   0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 8, 8, 256)    27904       ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 8, 8, 256)   1024        ['conv2d[0][0]']                 
 alization)                                                                                       
                                                                                                  
 conv2d_1 (Conv2D)              (None, 8, 8, 256)    590080      ['batch_normalization[0][0]']

In [11]:
# Create separate models for black and white players
white_chess_model = create_chess_model()
black_chess_model = create_chess_model()

white_weights_file = "/content/drive/MyDrive/deep_chess/weights/white_chess_model_weights.h5"
black_weights_file = "/content/drive/MyDrive/deep_chess/weights/black_chess_model_weights.h5"

try:
    # Try loading the saved weights for the white and black models
    white_chess_model.load_weights(white_weights_file)
    black_chess_model.load_weights(black_weights_file)
except:
    # If the weights file does not exist, do nothing
    pass

# Define the optimizer and loss functions for the chess models
optimizer = 'adam'
loss = 'categorical_crossentropy'

# Compile the models with the defined optimizer and loss
white_chess_model.compile(optimizer=optimizer, loss=loss)
black_chess_model.compile(optimizer=optimizer, loss=loss)

start_time = time.time()
save_interval = 10

def play_game(game_number, white_model, black_model, num_games, white_score, black_score,draws,num_simulations=100, verbose=False, time_per_move=2):
    board = chess.Board()
    running_reward_sum = 0

    white_rewards = 0
    black_rewards = 0

    while not board.is_game_over():
        if verbose:
            clear_output(wait=True)
            print(f"Game {game_number + 1} out of {num_games}. Reward total: {running_reward_sum}")
            print(f"White wins: {white_score[0]}, {white_rewards} | Black wins: {black_score[0]}, {black_rewards} | Draws: {draws[0]}")
            elapsed_time = (time.time() - start_time)/60
            print(f"Elapsed time: {elapsed_time:.2f}m")
            print(board)

        if board.turn == chess.WHITE:
            move, rewards = mcts(board, white_model, num_simulations=num_simulations)
            white_rewards += rewards
        else:
            move, rewards = mcts(board, black_model, num_simulations=num_simulations)
            black_rewards += rewards
        
        board.push(move)
        running_reward_sum += rewards

    result = board.result()

    return result, white_rewards, black_rewards


def run_multiple_games(white_chess_model, black_chess_model, num_games, num_simulations=100, num_threads=4, time_per_move=2):
    game_results = []
    white_score = [0] 
    black_score = [0]
    draws = [0] 

    white_rewards_list = []
    black_rewards_list = []

    completed_games = 0

    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        futures = [executor.submit(play_game, i, white_chess_model, black_chess_model, num_games, white_score, black_score, draws,num_simulations, True, time_per_move) for i in range(num_games)]

        for future in concurrent.futures.as_completed(futures):
            result, white_rewards, black_rewards = future.result()            
            game_results.append(result)
            white_rewards_list.append(white_rewards)
            black_rewards_list.append(black_rewards)

            if result == "1-0":
                white_score[0] += 1  
            elif result == "0-1":
                black_score[0] += 1 
            elif result == "1/2-1/2":
                draws[0] += 1
            
            completed_games += 1
            # Save at steady intervals
            if completed_games % save_interval == 0:
              white_chess_model.save_weights(white_weights_file)
              black_chess_model.save_weights(black_weights_file)
            
    white_rewards_array = np.array(white_rewards_list)
    black_rewards_array = np.array(black_rewards_list)
    results_array = np.array([1 if r == "1-0" else -1 if r == "0-1" else 0 for r in game_results])

    white_rewards_correlation = np.corrcoef(white_rewards_array, results_array)[0, 1]
    black_rewards_correlation = np.corrcoef(black_rewards_array, results_array)[0, 1]

    print(f"Correlation between white rewards and game outcomes: {white_rewards_correlation}")
    print(f"Correlation between black rewards and game outcomes: {black_rewards_correlation}")

    return game_results, white_score[0], black_score[0], draws[0], white_rewards_correlation, black_rewards_correlation

num_games = 50
game_results = run_multiple_games(white_chess_model, black_chess_model, num_games, num_simulations=50, num_threads=2, time_per_move=0.5)

# Save the weights of the trained models
white_chess_model.save_weights(white_weights_file)
black_chess_model.save_weights(white_weights_file)

Game 50 out of 50. Reward total: -4453
White wins: 0, -2643 | Black wins: 0, -1810 | Draws: 49
Elapsed time: 96.26m
r . b q . k n r
. p p p . p p p
n . . . . . . .
p . . . p . . .
Q . . . . . . P
b . P P . . . .
P P . N P P P .
R . B . K B N R


  c /= stddev[:, None]


Correlation between white rewards and game outcomes: nan
Correlation between black rewards and game outcomes: nan
