In [13]:
from collections import deque
import random
import chess
import chess.variant
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten
from tensorflow.compat.v1.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from IPython.display import display, HTML
import chess.svg
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate
from tqdm import tqdm
from tensorflow.keras.models import Model
import chess.pgn
import numpy as np
from tensorflow.keras.models import load_model

#chess Variant Antichess



def board_to_input_array(board):
    board_array = np.zeros((8, 8, 12), dtype=np.uint8)
    piece_mapping = {'r': 0, 'n': 1, 'b': 2, 'q': 3, 'k': 4, 'p': 5, 'R': 6, 'N': 7, 'B': 8, 'Q': 9, 'K': 10, 'P': 11}
    #normalize piece values

    
    for square, piece in board.piece_map().items():
        piece_type = piece_mapping[piece.symbol()]
        color = int(piece.color)
        board_array[square // 8, square % 8, piece_type] = color + 1  # Use 0 for empty squares

    return board_array


def state_to_index(board):
    board_array = np.array(board_to_input_array(board))
    return hash(board_array.tostring()) % state_space_size[0]



def choose_action(board,model):
    if np.random.rand() < exploration_prob:
        return np.random.choice(list(board.legal_moves))
    else:
        state_index = state_to_index(board)
        legal_moves_list = list(board.legal_moves)
        if not legal_moves_list:
            return chess.Move.null()
        q_values = model.predict(np.array([board_to_input_array(board)]))[0]
        best_move_index = np.argmax(q_values)
        best_move_uci = legal_moves_list[min(best_move_index, len(legal_moves_list)-1)].uci()
        return chess.Move.from_uci(best_move_uci)
    
# Function to convert a move into an output array
def move_to_output_array(move, legal_moves):
    output_array = np.zeros(action_space_size)
    move_index = list(legal_moves).index(move)
    output_array[move_index] = 1
    return output_array



# Hyperparameters
learning_rate = 0.01
discount_factor = 0.99
exploration_prob = 0.2

# Neural Network Architecture
state_space_size = (8, 8, 12)  # 8x8 board with 12 channels (one for each piece type and color)
action_space_size = 4096



# Initialize a deque for experience replay
experience_replay_buffer = deque(maxlen=10000)

# Neural Network Model alpha zero


# Input layer
input_layer = Input(shape=state_space_size)

# Contracting path
# Convolutional layers
conv1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_layer)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
flatten_layer = Flatten()(conv2)
dense1 = Dense(64, activation='relu')(flatten_layer)
dense2 = Dense(64, activation='relu')(dense1)
output_layer = Dense(action_space_size, activation='softmax')(dense2)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.1), loss=['categorical_crossentropy'], metrics=['accuracy'])


def count_pieces_by_color(board, color):
    piece_types = [chess.PAWN, chess.KNIGHT, chess.BISHOP, chess.ROOK, chess.QUEEN, chess.KING]
    return sum(len(board.pieces(piece_type, color)) for piece_type in piece_types)







def normalize_input(board):
    board_array = np.array(board_to_input_array(board), dtype=np.float16)
    board_array /= 12.0  # Assuming the maximum piece type value is 12
    return board_array



def update_q_table(state, action, reward, next_state):
    state_index = state_to_index(state)
    next_state_index = state_to_index(next_state)
    action_index = list(state.legal_moves).index(action)
    


    # Combine the rewards with weights (you can adjust the weights as needed)
    #total_reward = reward + 0.01 * piece_coordination_reward_value
    total_reward = reward

    # Store the experience in the replay buffer
    experience_replay_buffer.append((state_index, action_index, total_reward, next_state_index))

    # Sample a batch from the replay buffer for training
    batch_size = min(len(experience_replay_buffer), 8)
    if batch_size > 0:
        batch = np.array(random.sample(experience_replay_buffer, batch_size))
        states = np.array([board_to_input_array(chess.Board(fen=chess.STARTING_FEN)) for _ in batch[:, 0]])
        next_states = np.array([board_to_input_array(chess.Board(fen=chess.STARTING_FEN)) for _ in batch[:, 3]])
        q_values = model.predict(states)
        next_q_values = model.predict(next_states)
        
        for i in range(batch_size):
            action_idx = int(batch[i, 1])  # Cast to integer
            q_values[i, action_idx] += learning_rate * (
                batch[i, 2] + discount_factor * np.max(next_q_values[i]) - q_values[i, action_idx]
            )
        
        # Train the model on the batch
        model.train_on_batch(states, q_values)


def display_chess_board(board):
    return display(HTML(chess.svg.board(board=board, size=200)))

def play_game():
    board = chess.variant.GiveawayBoard()
    
    game_states = []
    total_reward = 0

    while not board.is_game_over():
        state = board.copy()
        game_states.append(state.copy())

        # Player 1 (White) makes a move
        white_move = choose_action(board, model)
        if white_move in board.legal_moves:
            board.push(white_move)
        else:
            print("Invalid move by White. Try again.")
            continue

        # Update state and check for game end
        if board.is_game_over():
            break

        # Player 2 (Black) makes a move
        black_move = choose_action(board, model)
        if black_move in board.legal_moves:
            board.push(black_move)
        else:
            print("Invalid move by Black. Try again.")
            continue

        next_state = board.copy()

        # Calculate rewards and update Q-table
        reward = calculate_reward(board) # You need to define this function based on your reward strategy
        update_q_table(state, white_move, reward, next_state)

        # Accumulate the reward
        total_reward += reward

        # Next state becomes the current state for the next iteration
        state = next_state

    game_states.append(board.copy())
    return game_states, board.result(), total_reward

    


def calculate_reward(board):
    reward = 0

    # Reward for losing pieces
    piece_count = len(board.piece_map())
    reward -= (32 - piece_count) * 0.1  # Assuming a standard 32-piece setup


    if board.is_stalemate() or board.is_insufficient_material():
        # Penalize for drawing the game
        reward -= 5
    elif board.is_fivefold_repetition() or board.is_seventyfive_moves():
        # Penalize for other types of draws
        reward -= 5

    return reward




log_dir = "logs/"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
num_games = 1000
total_rewards = []
results = {"white_wins": 0, "black_wins": 0, "draws": 0}
outcomes = []
num_off_white_pieces = []
num_off_black_pieces = []

reward_trend = []

for episode in tqdm(range(num_games), desc="Training"):
    exploration_prob *= 0.99  # Decay exploration probability
    print("Game:", episode)
    game_states, result, total_reward = play_game()

    # Append the total reward to the reward trend list
    reward_trend.append(total_reward)

    # Update results based on the game outcome
    if result == "1-0":
        results["white_wins"] += 1
        outcomes.append(1)
    elif result == "0-1":
        results["black_wins"] += 1
        outcomes.append(0)
    elif result == "1/2-1/2":
        results["draws"] += 1
        outcomes.append(0.5)  # Fix here: Append 0.5 for draws

    total_rewards.append(total_reward)
    
    # Usage
    white_pieces = count_pieces_by_color(game_states[-1], chess.WHITE)
    black_pieces = count_pieces_by_color(game_states[-1], chess.BLACK)
    
    num_off_white_pieces.append(white_pieces)
    num_off_black_pieces.append(black_pieces)
    
    # Display the total reward for each game
    print("Total Reward for Game {}: {}".format(episode, total_reward))
    print("Game Outcome:", result)
    #game length
    print("Game Length:", len(game_states))

# Display statistics
average_reward = sum(total_rewards) / num_games
print("Average Total Reward:", average_reward)

# Extract the FEN of the final position
final_position_fen = game_states[-1].fen()
print("Final Position FEN:", final_position_fen)

# Display the last game
for state in game_states:
    display_chess_board(state)

# Display results
print("\nResults Summary:")
print("White Wins:", results["white_wins"])
print("Black Wins:", results["black_wins"])
print("Draws:", results["draws"])

# Plot trend lines
plt.plot(outcomes, label="Game Outcomes")
plt.xlabel("Games")
plt.ylabel("Outcome (1 for White Win, 0 for Draw, 0.5 for Loss)")
plt.legend()
plt.title("Game Outcomes Trend")

plt.figure()
plt.plot(num_off_white_pieces, label="White Pieces")
plt.plot(num_off_black_pieces, label="Black Pieces")
plt.xlabel("Games")
plt.ylabel("Number of Pieces")
plt.legend()
plt.title("Number of Pieces Trend")

# Plot rewards
plt.figure()
plt.plot(reward_trend, label="Rewards")
plt.xlabel("Games")
plt.ylabel("Total Reward")
plt.legend()
plt.title("Reward Trend")

plt.show()

# Save the model

model.save("v1_1k.h5")  # Change the file name as needed



Training:   1%|▏         | 13/1000 [00:28<34:17,  2.08s/it]

Total Reward for Game 12: -36.7
Game Outcome: 1-0
Game Length: 27
Game: 13


Training:   1%|▏         | 14/1000 [00:30<33:14,  2.02s/it]

Total Reward for Game 13: -29.900000000000006
Game Outcome: 0-1
Game Length: 27
Game: 14


Training:   2%|▏         | 15/1000 [00:33<36:30,  2.22s/it]

Total Reward for Game 14: -62.5
Game Outcome: 0-1
Game Length: 38
Game: 15


Training:   2%|▏         | 16/1000 [00:35<35:48,  2.18s/it]

Total Reward for Game 15: -40.400000000000006
Game Outcome: 0-1
Game Length: 28
Game: 16


Training:   2%|▏         | 17/1000 [00:37<36:11,  2.21s/it]

Total Reward for Game 16: -44.400000000000006
Game Outcome: 0-1
Game Length: 30
Game: 17


Training:   2%|▏         | 18/1000 [00:40<37:12,  2.27s/it]

Total Reward for Game 17: -58.4
Game Outcome: 0-1
Game Length: 34
Game: 18


Training:   2%|▏         | 19/1000 [00:42<35:08,  2.15s/it]

Total Reward for Game 18: -32.6
Game Outcome: 0-1
Game Length: 26
Game: 19


Training:   2%|▏         | 20/1000 [00:44<34:01,  2.08s/it]

Total Reward for Game 19: -32.3
Game Outcome: 0-1
Game Length: 27
Game: 20


Training:   2%|▏         | 21/1000 [00:46<35:37,  2.18s/it]

Total Reward for Game 20: -50.0
Game Outcome: 0-1
Game Length: 34
Game: 21


Training:   2%|▏         | 22/1000 [00:48<36:36,  2.25s/it]

Total Reward for Game 21: -50.7
Game Outcome: 0-1
Game Length: 32
Game: 22


Training:   2%|▏         | 23/1000 [00:50<35:49,  2.20s/it]

Total Reward for Game 22: -46.5
Game Outcome: 1-0
Game Length: 30
Game: 23


Training:   2%|▏         | 24/1000 [00:53<38:02,  2.34s/it]

Total Reward for Game 23: -58.20000000000001
Game Outcome: 1-0
Game Length: 35
Game: 24


Training:   2%|▎         | 25/1000 [00:56<39:18,  2.42s/it]

Total Reward for Game 24: -55.8
Game Outcome: 0-1
Game Length: 34
Game: 25


Training:   3%|▎         | 26/1000 [00:58<38:52,  2.39s/it]

Total Reward for Game 25: -53.7
Game Outcome: 1-0
Game Length: 32
Game: 26


Training:   3%|▎         | 27/1000 [01:01<39:46,  2.45s/it]

Total Reward for Game 26: -54.7
Game Outcome: 1-0
Game Length: 34
Game: 27


Training:   3%|▎         | 28/1000 [01:03<40:18,  2.49s/it]

Total Reward for Game 27: -54.49999999999999
Game Outcome: 0-1
Game Length: 35
Game: 28


Training:   3%|▎         | 29/1000 [01:06<40:06,  2.48s/it]

Total Reward for Game 28: -34.4
Game Outcome: 0-1
Game Length: 28
Game: 29


Training:   3%|▎         | 30/1000 [01:09<42:06,  2.61s/it]

Total Reward for Game 29: -61.90000000000001
Game Outcome: 0-1
Game Length: 37
Game: 30


Training:   3%|▎         | 31/1000 [01:11<41:37,  2.58s/it]

Total Reward for Game 30: -56.29999999999999
Game Outcome: 0-1
Game Length: 35
Game: 31


Training:   3%|▎         | 32/1000 [01:13<37:38,  2.33s/it]

Total Reward for Game 31: -33.2
Game Outcome: 1-0
Game Length: 23
Game: 32


Training:   3%|▎         | 33/1000 [01:15<37:22,  2.32s/it]

Total Reward for Game 32: -46.80000000000001
Game Outcome: 0-1
Game Length: 31
Game: 33


Training:   3%|▎         | 34/1000 [01:17<33:30,  2.08s/it]

Total Reward for Game 33: -28.6
Game Outcome: 0-1
Game Length: 22
Game: 34


Training:   4%|▎         | 35/1000 [01:19<36:23,  2.26s/it]

Total Reward for Game 34: -50.699999999999996
Game Outcome: 0-1
Game Length: 38
Game: 35


Training:   4%|▎         | 36/1000 [01:21<35:19,  2.20s/it]

Total Reward for Game 35: -39.300000000000004
Game Outcome: 1-0
Game Length: 27
Game: 36


Training:   4%|▎         | 37/1000 [01:24<39:16,  2.45s/it]

Total Reward for Game 36: -70.69999999999999
Game Outcome: 1/2-1/2
Game Length: 42
Game: 37


Training:   4%|▍         | 38/1000 [01:27<37:21,  2.33s/it]

Total Reward for Game 37: -33.1
Game Outcome: 0-1
Game Length: 28
Game: 38


Training:   4%|▍         | 39/1000 [01:28<33:35,  2.10s/it]

Total Reward for Game 38: -25.9
Game Outcome: 0-1
Game Length: 22
Game: 39


Training:   4%|▍         | 40/1000 [01:30<35:06,  2.19s/it]

Total Reward for Game 39: -48.099999999999994
Game Outcome: 0-1
Game Length: 33
Game: 40


Training:   4%|▍         | 41/1000 [01:33<35:18,  2.21s/it]

Total Reward for Game 40: -41.8
Game Outcome: 1-0
Game Length: 31
Game: 41


Training:   4%|▍         | 42/1000 [01:35<33:49,  2.12s/it]

Total Reward for Game 41: -31.100000000000005
Game Outcome: 0-1
Game Length: 26
Game: 42


Training:   4%|▍         | 43/1000 [01:37<36:15,  2.27s/it]

Total Reward for Game 42: -62.1
Game Outcome: 1-0
Game Length: 36
Game: 43


Training:   4%|▍         | 44/1000 [01:40<36:55,  2.32s/it]

Total Reward for Game 43: -48.1
Game Outcome: 0-1
Game Length: 31
Game: 44


Training:   4%|▍         | 45/1000 [01:41<34:15,  2.15s/it]

Total Reward for Game 44: -29.900000000000002
Game Outcome: 0-1
Game Length: 24
Game: 45


Training:   5%|▍         | 46/1000 [01:44<35:30,  2.23s/it]

Total Reward for Game 45: -49.3
Game Outcome: 0-1
Game Length: 34
Game: 46


Training:   5%|▍         | 47/1000 [01:45<31:48,  2.00s/it]

Total Reward for Game 46: -22.799999999999997
Game Outcome: 0-1
Game Length: 21
Game: 47


Training:   5%|▍         | 48/1000 [01:48<34:47,  2.19s/it]

Total Reward for Game 47: -49.0
Game Outcome: 1-0
Game Length: 31
Game: 48


Training:   5%|▍         | 49/1000 [01:50<34:19,  2.17s/it]

Total Reward for Game 48: -39.6
Game Outcome: 0-1
Game Length: 28
Game: 49


Training:   5%|▌         | 50/1000 [01:54<43:06,  2.72s/it]

Total Reward for Game 49: -100.60000000000005
Game Outcome: 1/2-1/2
Game Length: 50
Game: 50


Training:   5%|▌         | 51/1000 [01:56<39:17,  2.48s/it]

Total Reward for Game 50: -33.10000000000001
Game Outcome: 0-1
Game Length: 25
Game: 51


Training:   5%|▌         | 52/1000 [01:58<35:34,  2.25s/it]

Total Reward for Game 51: -31.900000000000006
Game Outcome: 1-0
Game Length: 22
Game: 52


Training:   5%|▌         | 53/1000 [01:59<33:13,  2.10s/it]

Total Reward for Game 52: -25.8
Game Outcome: 0-1
Game Length: 24
Game: 53


Training:   5%|▌         | 54/1000 [02:01<32:19,  2.05s/it]

Total Reward for Game 53: -36.70000000000001
Game Outcome: 0-1
Game Length: 27
Game: 54


Training:   6%|▌         | 55/1000 [02:03<30:24,  1.93s/it]

Total Reward for Game 54: -20.700000000000003
Game Outcome: 0-1
Game Length: 22
Game: 55


Training:   6%|▌         | 56/1000 [02:05<29:41,  1.89s/it]

Total Reward for Game 55: -33.00000000000001
Game Outcome: 1-0
Game Length: 24
Game: 56


Training:   6%|▌         | 57/1000 [02:07<28:59,  1.84s/it]

Total Reward for Game 56: -28.000000000000004
Game Outcome: 0-1
Game Length: 25
Game: 57


Training:   6%|▌         | 58/1000 [02:09<32:41,  2.08s/it]

Total Reward for Game 57: -62.60000000000001
Game Outcome: 1-0
Game Length: 36
Game: 58


Training:   6%|▌         | 59/1000 [02:11<31:10,  1.99s/it]

Total Reward for Game 58: -28.800000000000004
Game Outcome: 1-0
Game Length: 23
Game: 59


Training:   6%|▌         | 60/1000 [02:14<36:55,  2.36s/it]

Total Reward for Game 59: -79.00000000000003
Game Outcome: 1/2-1/2
Game Length: 43
Game: 60


Training:   6%|▌         | 61/1000 [02:16<36:27,  2.33s/it]

Total Reward for Game 60: -49.3
Game Outcome: 1-0
Game Length: 30
Game: 61


Training:   6%|▌         | 62/1000 [02:19<37:21,  2.39s/it]

Total Reward for Game 61: -43.099999999999994
Game Outcome: 0-1
Game Length: 33
Game: 62


Training:   6%|▋         | 63/1000 [02:21<35:14,  2.26s/it]

Total Reward for Game 62: -26.900000000000006
Game Outcome: 0-1
Game Length: 26
Game: 63


Training:   6%|▋         | 64/1000 [02:23<34:44,  2.23s/it]

Total Reward for Game 63: -42.500000000000014
Game Outcome: 0-1
Game Length: 29
Game: 64


Training:   6%|▋         | 65/1000 [02:27<41:08,  2.64s/it]

Total Reward for Game 64: -89.00000000000003
Game Outcome: 0-1
Game Length: 47
Game: 65


Training:   7%|▋         | 66/1000 [02:29<39:30,  2.54s/it]

Total Reward for Game 65: -45.0
Game Outcome: 0-1
Game Length: 30
Game: 66


Training:   7%|▋         | 67/1000 [02:33<44:09,  2.84s/it]

Total Reward for Game 66: -73.89999999999999
Game Outcome: 0-1
Game Length: 40
Game: 67


Training:   7%|▋         | 68/1000 [02:35<40:19,  2.60s/it]

Total Reward for Game 67: -33.800000000000004
Game Outcome: 0-1
Game Length: 26
Game: 68


Training:   7%|▋         | 69/1000 [02:37<41:20,  2.66s/it]

Total Reward for Game 68: -57.400000000000006
Game Outcome: 0-1
Game Length: 37
Game: 69


Training:   7%|▋         | 70/1000 [02:39<37:42,  2.43s/it]

Total Reward for Game 69: -31.300000000000004
Game Outcome: 0-1
Game Length: 26
Game: 70


Training:   7%|▋         | 71/1000 [02:41<35:53,  2.32s/it]

Total Reward for Game 70: -41.2
Game Outcome: 1-0
Game Length: 28
Game: 71


Training:   7%|▋         | 72/1000 [02:43<32:39,  2.11s/it]

Total Reward for Game 71: -29.300000000000004
Game Outcome: 1-0
Game Length: 22
Game: 72


Training:   7%|▋         | 73/1000 [02:45<30:57,  2.00s/it]

Total Reward for Game 72: -31.4
Game Outcome: 0-1
Game Length: 25
Game: 73


Training:   7%|▋         | 74/1000 [02:46<29:03,  1.88s/it]

Total Reward for Game 73: -32.70000000000001
Game Outcome: 1-0
Game Length: 22
Game: 74


Training:   8%|▊         | 75/1000 [02:49<30:40,  1.99s/it]

Total Reward for Game 74: -42.80000000000001
Game Outcome: 0-1
Game Length: 30
Game: 75


Training:   8%|▊         | 76/1000 [02:52<37:19,  2.42s/it]

Total Reward for Game 75: -91.20000000000005
Game Outcome: 1/2-1/2
Game Length: 46
Game: 76


Training:   8%|▊         | 77/1000 [02:54<36:39,  2.38s/it]

Total Reward for Game 76: -43.300000000000004
Game Outcome: 0-1
Game Length: 29
Game: 77


Training:   8%|▊         | 78/1000 [02:57<37:23,  2.43s/it]

Total Reward for Game 77: -56.70000000000002
Game Outcome: 0-1
Game Length: 34
Game: 78


Training:   8%|▊         | 79/1000 [02:59<34:18,  2.23s/it]

Total Reward for Game 78: -32.70000000000001
Game Outcome: 1-0
Game Length: 24
Game: 79


Training:   8%|▊         | 80/1000 [03:01<33:44,  2.20s/it]

Total Reward for Game 79: -40.900000000000006
Game Outcome: 1-0
Game Length: 29
Game: 80


Training:   8%|▊         | 81/1000 [03:03<33:33,  2.19s/it]

Total Reward for Game 80: -40.4
Game Outcome: 0-1
Game Length: 30
Game: 81


Training:   8%|▊         | 82/1000 [03:05<31:08,  2.04s/it]

Total Reward for Game 81: -23.1
Game Outcome: 0-1
Game Length: 22
Game: 82


Training:   8%|▊         | 83/1000 [03:07<32:36,  2.13s/it]

Total Reward for Game 82: -50.8
Game Outcome: 0-1
Game Length: 32
Game: 83


Training:   8%|▊         | 84/1000 [03:09<34:13,  2.24s/it]

Total Reward for Game 83: -50.99999999999999
Game Outcome: 1-0
Game Length: 33
Game: 84


Training:   8%|▊         | 85/1000 [03:11<31:27,  2.06s/it]

Total Reward for Game 84: -32.400000000000006
Game Outcome: 1-0
Game Length: 22
Game: 85


Training:   9%|▊         | 86/1000 [03:14<34:19,  2.25s/it]

Total Reward for Game 85: -44.4
Game Outcome: 0-1
Game Length: 31
Game: 86


Training:   9%|▊         | 87/1000 [03:16<31:59,  2.10s/it]

Total Reward for Game 86: -29.000000000000007
Game Outcome: 0-1
Game Length: 24
Game: 87


Training:   9%|▉         | 88/1000 [03:18<32:11,  2.12s/it]

Total Reward for Game 87: -40.4
Game Outcome: 0-1
Game Length: 29
Game: 88


Training:   9%|▉         | 89/1000 [03:20<34:09,  2.25s/it]

Total Reward for Game 88: -57.40000000000001
Game Outcome: 1-0
Game Length: 34
Game: 89


Training:   9%|▉         | 90/1000 [03:22<33:53,  2.23s/it]

Total Reward for Game 89: -40.8
Game Outcome: 0-1
Game Length: 29
Game: 90


Training:   9%|▉         | 91/1000 [03:24<31:33,  2.08s/it]

Total Reward for Game 90: -27.1
Game Outcome: 0-1
Game Length: 24
Game: 91


Training:   9%|▉         | 92/1000 [03:28<39:09,  2.59s/it]

Total Reward for Game 91: -96.80000000000003
Game Outcome: 0-1
Game Length: 50
Game: 92


Training:   9%|▉         | 93/1000 [03:30<35:06,  2.32s/it]

Total Reward for Game 92: -32.400000000000006
Game Outcome: 1-0
Game Length: 22
Game: 93


Training:   9%|▉         | 94/1000 [03:33<38:53,  2.58s/it]

Total Reward for Game 93: -88.00000000000004
Game Outcome: 1/2-1/2
Game Length: 41
Game: 94


Training:  10%|▉         | 95/1000 [03:35<38:10,  2.53s/it]

Total Reward for Game 94: -51.80000000000001
Game Outcome: 1-0
Game Length: 31
Game: 95


Training:  10%|▉         | 96/1000 [03:37<34:45,  2.31s/it]

Total Reward for Game 95: -26.5
Game Outcome: 1-0
Game Length: 23
Game: 96


Training:  10%|▉         | 97/1000 [03:39<31:01,  2.06s/it]

Total Reward for Game 96: -20.1
Game Outcome: 0-1
Game Length: 20
Game: 97


Training:  10%|▉         | 98/1000 [03:40<30:16,  2.01s/it]

Total Reward for Game 97: -36.7
Game Outcome: 1-0
Game Length: 25
Game: 98


Training:  10%|▉         | 99/1000 [03:43<32:20,  2.15s/it]

Total Reward for Game 98: -50.599999999999994
Game Outcome: 0-1
Game Length: 33
Game: 99


Training:  10%|█         | 100/1000 [03:45<31:10,  2.08s/it]

Total Reward for Game 99: -35.800000000000004
Game Outcome: 1-0
Game Length: 25
Game: 100


Training:  10%|█         | 101/1000 [03:47<32:48,  2.19s/it]

Total Reward for Game 100: -53.99999999999999
Game Outcome: 0-1
Game Length: 33
Game: 101


Training:  10%|█         | 102/1000 [03:49<32:34,  2.18s/it]

Total Reward for Game 101: -45.80000000000002
Game Outcome: 0-1
Game Length: 30
Game: 102


Training:  10%|█         | 103/1000 [03:52<32:24,  2.17s/it]

Total Reward for Game 102: -43.70000000000001
Game Outcome: 1-0
Game Length: 28
Game: 103


Training:  10%|█         | 104/1000 [03:54<32:26,  2.17s/it]

Total Reward for Game 103: -40.00000000000001
Game Outcome: 0-1
Game Length: 29
Game: 104


Training:  10%|█         | 105/1000 [03:58<42:33,  2.85s/it]

Total Reward for Game 104: -101.10000000000005
Game Outcome: 0-1
Game Length: 52
Game: 105


Training:  11%|█         | 106/1000 [04:00<36:37,  2.46s/it]

Total Reward for Game 105: -27.500000000000007
Game Outcome: 1-0
Game Length: 20
Game: 106


Training:  11%|█         | 107/1000 [04:03<39:22,  2.65s/it]

Total Reward for Game 106: -59.40000000000002
Game Outcome: 0-1
Game Length: 37
Game: 107


Training:  11%|█         | 108/1000 [04:06<39:39,  2.67s/it]

Total Reward for Game 107: -54.800000000000004
Game Outcome: 1-0
Game Length: 32
Game: 108


Training:  11%|█         | 109/1000 [04:08<37:02,  2.49s/it]

Total Reward for Game 108: -35.10000000000001
Game Outcome: 0-1
Game Length: 27
Game: 109


Training:  11%|█         | 109/1000 [04:09<33:57,  2.29s/it]


KeyboardInterrupt: 