## Game environment functions

In [151]:
#Game Environment Utilities
import numpy as np
import copy
import time

# --- Constants ---
ROWS = 6
COLS = 7
EMPTY = 0
PLAYER1_PIECE = 1
PLAYER2_PIECE = 2

# --- Board Functions ---
def create_board():
    """Creates an empty Connect 4 board."""
    return np.zeros((ROWS, COLS), dtype=int)

def print_board(board):
    """Prints the board to the console in a formatted way."""
    # Map internal values to display characters
    piece_map = {
        EMPTY: " ",
        PLAYER1_PIECE: "X", # Player 1
        PLAYER2_PIECE: "O"  # Player 2 (Using 'O' instead of '0' for clarity)
    }

    # Print the board rows from top to bottom (needs flipping)
    flipped_board = np.flip(board, 0)
    for r in range(ROWS):
        row_str = "| " # Start of the row border
        # Join pieces with spaces in between
        row_str += " ".join([piece_map[flipped_board[r][c]] for c in range(COLS)])
        row_str += " |" # End of the row border
        print(row_str)

    # Print the bottom border
    print("+" + "-" * (COLS * 2 + 1) + "+") # Adjust width based on spacing

    # Print the column numbers (1-7) aligned below
    col_numbers = "  " + " ".join(map(str, range(1, COLS + 1)))
    print(col_numbers)

def is_valid_location(board, col):
    """Checks if a column is valid for dropping a piece."""
    return 0 <= col < COLS and board[ROWS - 1][col] == EMPTY 

def get_next_open_row(board, col):
    """Finds the lowest empty row in a given column."""
    for r in range(ROWS):
        if board[r][col] == EMPTY:
            return r
    return None 

def drop_piece(board, row, col, piece):
    """Places a piece on the board at the specified location."""
    board[row][col] = piece

def get_valid_locations(board):
    """Returns a list of columns where a piece can be dropped."""
    return [col for col in range(COLS) if is_valid_location(board, col)]

# --- Winning Condition Logic ---
def winning_move(board, piece):
    """Checks if the specified player has won."""
    # Check horizontal locations
    for c in range(COLS - 3):
        for r in range(ROWS):
            if all(board[r][c+i] == piece for i in range(4)):
                return True

    # Check vertical locations
    for c in range(COLS):
        for r in range(ROWS - 3):
            if all(board[r+i][c] == piece for i in range(4)):
                return True

    # Check positively sloped diagonals
    for c in range(COLS - 3):
        for r in range(ROWS - 3):
            if all(board[r+i][c+i] == piece for i in range(4)):
                return True

    # Check negatively sloped diagonals
    for c in range(COLS - 3):
        for r in range(3, ROWS):
            if all(board[r-i][c+i] == piece for i in range(4)):
                return True

    return False

def is_terminal_node(board):
    """Checks if the game has ended (win or draw)."""
    return winning_move(board, PLAYER1_PIECE) or \
           winning_move(board, PLAYER2_PIECE) or \
           len(get_valid_locations(board)) == 0

print("Game Environment Utilities Loaded.")

Game Environment Utilities Loaded.


## Player Base Class & Human Player

In [153]:
# Player Base Class & Human Player
from abc import ABC, abstractmethod

class Player(ABC):
    """Abstract base class for all Connect 4 players."""
    def __init__(self, player_id):
        self.player_id = player_id 

    @abstractmethod
    def get_move(self, board):
        """
        Given the current board state, returns the column where the player wants to move.

        Args:
            board (np.ndarray): The current 6x7 game board.

        Returns:
            int: The column index (0-6) for the move.
        """
        pass

class HumanPlayer(Player):
    """A player controlled by human input via the console (accepts 1-7)."""
    def get_move(self, board):
        """Gets move from user input, expecting 1-7."""
        valid_locations_zero_based = get_valid_locations(board) # Gets 0-6

        # Convert valid locations to 1-7 for display
        valid_locations_display = [loc + 1 for loc in valid_locations_zero_based]

        if not valid_locations_display:
             print("Error: No valid moves available!")
             return None # Or handle this scenario as appropriate

        while True:
            try:
                # Ask for input in the 1-7 range
                col_str = input(f"Player {self.player_id}, choose column ({', '.join(map(str, valid_locations_display))}): ")
                user_col = int(col_str) # User inputs 1-7

                # Convert user input (1-7) back to zero-based index (0-6) for internal use
                internal_col = user_col - 1

                # Validate using the zero-based index
                if internal_col in valid_locations_zero_based:
                    return internal_col # Return the 0-6 index
                else:
                    print(f"Invalid column {user_col}. Please choose from {valid_locations_display}.")
            except ValueError:
                print("Invalid input. Please enter a number.")
            except Exception as e:
                print(f"An unexpected error occurred: {e}")

print("Player Base Class and HumanPlayer Loaded.")

Player Base Class and HumanPlayer Loaded.


## CNN Model Training & Saving

In [6]:
# CNN Model Training & Saving
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split

print("TensorFlow Version:", tf.__version__)

# --- Data Loading and Preparation ---
try:
    df = pd.read_csv("c4_game_database.csv") # Data from Kaggle
    df = df.rename(columns={'42': 'winner'})
    df = df.dropna(subset=['winner'])

    X = df.iloc[:, :42].values
    y = df['winner'].values

    # Reshape features to 6x7 boards
    # Replace -1 with 2 for Player 2 pieces
    X[X == -1] = PLAYER2_PIECE
    X = X.reshape(-1, ROWS, COLS)

    # Target variable: Assuming winner is 1 (Player 1), -1 (Player 2), 0 (Draw) in CSV
    # Check unique values in y to confirm mapping
    print("Original unique y values:", np.unique(y))
    y_mapped = np.copy(y)
    y_mapped[y == 1] = 1  # Player 1 Win remains 1
    y_mapped[y == -1] = 2 # Player 2 Win becomes 2
    y_mapped[y == 0] = 0  # Draw remains 0 
    print("Mapped unique y values:", np.unique(y_mapped))
    y = y_mapped

    print("Shape of X:", X.shape)
    print("Shape of y:", y.shape)

    # Data Splitting
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) 

    # Reshape for CNN (add channel dimension)
    X_train_cnn = X_train.reshape(-1, ROWS, COLS, 1)
    X_test_cnn = X_test.reshape(-1, ROWS, COLS, 1)

    print("X_train_cnn shape:", X_train_cnn.shape)
    print("y_train shape:", y_train.shape)


    # --- CNN Model Definition ---
    cnn_model = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='SAME', input_shape=(ROWS, COLS, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu', padding='SAME'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(3, activation='softmax')
    ])

    # Compile the model
    cnn_model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

    cnn_model.summary()

    # --- Model Training ---
    print("\n--- Training CNN Model ---")

    history = cnn_model.fit(X_train_cnn, y_train,
                            epochs=10, # Adjust epochs as needed
                            batch_size=32,
                            validation_data=(X_test_cnn, y_test))

    print("\n--- Evaluating CNN Model ---")
    loss, accuracy = cnn_model.evaluate(X_test_cnn, y_test, verbose=0)
    print(f"Test Loss: {loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")

    # --- SAVING THE MODEL ---
    model_save_path = 'connect4_cnn_model.h5' # Keras H5 format
    cnn_model.save(model_save_path)
    print(f"\nCNN Model saved to {model_save_path}")

except FileNotFoundError:
    print("Error: c4_game_database.csv not found. Cannot train CNN model.")
except Exception as e:
    print(f"An error occurred during CNN training: {e}")

TensorFlow Version: 2.18.0
Original unique y values: [-1.  0.  1.]
Mapped unique y values: [0. 1. 2.]
Shape of X: (376619, 6, 7)
Shape of y: (376619,)
X_train_cnn shape: (301295, 6, 7, 1)
y_train shape: (301295,)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



--- Training CNN Model ---
Epoch 1/10
[1m9416/9416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4ms/step - accuracy: 0.8152 - loss: 0.3846 - val_accuracy: 0.9284 - val_loss: 0.1767
Epoch 2/10
[1m9416/9416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 4ms/step - accuracy: 0.9266 - loss: 0.1852 - val_accuracy: 0.9480 - val_loss: 0.1333
Epoch 3/10
[1m9416/9416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 4ms/step - accuracy: 0.9467 - loss: 0.1380 - val_accuracy: 0.9570 - val_loss: 0.1111
Epoch 4/10
[1m9416/9416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.9564 - loss: 0.1131 - val_accuracy: 0.9656 - val_loss: 0.0901
Epoch 5/10
[1m9416/9416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 4ms/step - accuracy: 0.9628 - loss: 0.0985 - val_accuracy: 0.9672 - val_loss: 0.0872
Epoch 6/10
[1m9416/9416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 4ms/step - accuracy: 0.9681 - loss: 0.0844 - val_accuracy: 0.9711 -



Test Loss: 0.0590
Test Accuracy: 0.9784

CNN Model saved to connect4_cnn_model.h5


## AI Player - CNN-Minimax Player

In [155]:
# CNN-Minimax AI Player Implementation
import tensorflow as tf
import numpy as np
import random
import math
import time

# --- AI Player using CNN and Minimax ---
class CNNMinimaxPlayer(Player):
    def __init__(self, player_id, model_path='connect4_cnn_model.h5', search_depth=4):
        """
        Initializes the AI player.

        Args:
            player_id (int): PLAYER1_PIECE or PLAYER2_PIECE.
            model_path (str): Path to the saved Keras model file.
            search_depth (int): The depth for the Minimax search.
        """
        super().__init__(player_id)
        self.opponent_id = PLAYER1_PIECE if player_id == PLAYER2_PIECE else PLAYER2_PIECE
        self.search_depth = search_depth
        try:
            self.model = tf.keras.models.load_model(model_path)
            print(f"CNN Model loaded successfully from {model_path} for Player {player_id}")
            # Perform a dummy prediction to ensure the model is fully loaded/compiled
            dummy_board = create_board().reshape(1, ROWS, COLS, 1)
            _ = self.model.predict(dummy_board, verbose=0)
            print("Model ready.")
        except Exception as e:
            print(f"Error loading model from {model_path}: {e}")
            print("CNNMinimaxPlayer will not function correctly.")
            self.model = None

    def _evaluate_board_cnn(self, board):
        """
        Evaluates the board state using the CNN model.
        Returns a score from Player 1's perspective (higher is better for P1).
        """
        if self.model is None:
            return 0 # Cannot evaluate without a model

        # Reshape board for CNN input
        board_cnn = board.reshape(1, ROWS, COLS, 1)
        probabilities = self.model.predict(board_cnn, verbose=0)[0]

        # probabilities[0] = Draw, probabilities[1] = P1 Win, probabilities[2] = P2 Win
        # Score: P1 win probability - P2 win probability
        score = probabilities[1] - probabilities[2]
        return score

    def _minimax(self, board, depth, maximizing_player, alpha, beta):
        """
        Minimax algorithm with Alpha-Beta pruning.

        Returns:
            tuple: (column, score) - Score is from Player 1's perspective.
        """
        valid_locations = get_valid_locations(board)
        is_terminal = is_terminal_node(board)

        if depth == 0 or is_terminal:
            if is_terminal:
                if winning_move(board, PLAYER1_PIECE):
                    return (None, 1000000 + depth) # Prioritize faster wins
                elif winning_move(board, PLAYER2_PIECE):
                    return (None, -1000000 - depth) # Prioritize blocking faster losses
                else: # Game is draw
                    return (None, 0)
            else: # Depth is zero, use CNN evaluation
                return (None, self._evaluate_board_cnn(board))

        if maximizing_player: # Player 1's turn (Maximize score)
            value = -math.inf
            best_col = random.choice(valid_locations) # Default move
            for col in valid_locations:
                row = get_next_open_row(board, col)
                temp_board = board.copy()
                drop_piece(temp_board, row, col, PLAYER1_PIECE)
                _, new_score = self._minimax(temp_board, depth - 1, False, alpha, beta)
                if new_score > value:
                    value = new_score
                    best_col = col
                alpha = max(alpha, value)
                if alpha >= beta:
                    break # Beta cutoff
            return best_col, value
        else: # Player 2's turn (Minimize score from P1's perspective)
            value = math.inf
            best_col = random.choice(valid_locations) # Default move
            for col in valid_locations:
                row = get_next_open_row(board, col)
                temp_board = board.copy()
                drop_piece(temp_board, row, col, PLAYER2_PIECE)
                _, new_score = self._minimax(temp_board, depth - 1, True, alpha, beta)
                if new_score < value:
                    value = new_score
                    best_col = col
                beta = min(beta, value)
                if alpha >= beta:
                    break # Alpha cutoff
            return best_col, value

    def get_move(self, board):
        """
        Determines the AI's move using Minimax with the CNN evaluator.
        Includes checking for immediate wins and blocking opponent wins.
        """
        if self.model is None:
             print("AI Error: Model not loaded. Choosing random move.")
             return random.choice(get_valid_locations(board))

        valid_locations = get_valid_locations(board)
        start_time = time.time()

        # 1. Check for immediate winning move for self
        for col in valid_locations:
            temp_board = board.copy()
            row = get_next_open_row(temp_board, col)
            drop_piece(temp_board, row, col, self.player_id)
            if winning_move(temp_board, self.player_id):
                print(f"AI Player {self.player_id}: Found winning move in column {col}")
                return col

        # 2. Check for immediate winning move for opponent and block it
        for col in valid_locations:
            temp_board = board.copy()
            row = get_next_open_row(temp_board, col)
            drop_piece(temp_board, row, col, self.opponent_id)
            if winning_move(temp_board, self.opponent_id):
                print(f"AI Player {self.player_id}: Blocking opponent win in column {col}")
                return col

        # 3. If no immediate win/block, use Minimax
        print(f"AI Player {self.player_id}: Running Minimax (depth {self.search_depth})...")
        # maximizing_player is True if self.player_id is PLAYER1_PIECE
        col, minimax_score = self._minimax(board, self.search_depth, self.player_id == PLAYER1_PIECE, -math.inf, math.inf)

        end_time = time.time()
        print(f"AI Player {self.player_id}: Chose column {col} (Score: {minimax_score:.2f}, Time: {end_time - start_time:.2f}s)")

        if col is None or col not in valid_locations: # Fallback if minimax fails (shouldn't happen often)
             print(f"AI Warning: Minimax returned invalid move {col}. Choosing random valid move.")
             col = random.choice(valid_locations)

        return col

print("CNNMinimaxPlayer Loaded.")


CNNMinimaxPlayer Loaded.


## AI Player - Random Player

In [157]:
# Example - Random AI Player
import random

class RandomAIPlayer(Player):
    """An AI player that chooses a valid move randomly."""
    def get_move(self, board):
        valid_locations = get_valid_locations(board)
        move = random.choice(valid_locations)
        print(f"Random AI Player {self.player_id}: Chose column {move}")
        time.sleep(0.5) # Add a small delay to simulate thinking
        return move

print("RandomAIPlayer Loaded.")

RandomAIPlayer Loaded.


## AI Player - Slightly Better Random Player

In [159]:
# Slightly Better Random AI Player
import random
import copy

class SlightlyBetterRandomAIPlayer(Player):
    """
    An AI player that makes moves based on the following priority:
    1. Play a winning move if available.
    2. Block the opponent's winning move if available.
    3. Choose a random valid move otherwise.
    """
    def __init__(self, player_id):
        """
        Initializes the player.

        Args:
            player_id (int): PLAYER1_PIECE or PLAYER2_PIECE.
        """
        super().__init__(player_id)
        # Determine the opponent's piece ID
        self.opponent_id = PLAYER1_PIECE if player_id == PLAYER2_PIECE else PLAYER2_PIECE
        print(f"SlightlyBetterRandomAIPlayer initialized for Player {self.player_id} (Opponent: {self.opponent_id})")

    def get_move(self, board):
        """
        Determines the move based on win, block, or random choice.

        Args:
            board (np.ndarray): The current 6x7 game board.

        Returns:
            int: The column index (0-6) for the move.
        """
        valid_locations = get_valid_locations(board)

        # 1. Check for immediate winning move for self
        for col in valid_locations:
            temp_board = board.copy() # Use a copy to simulate the move
            row = get_next_open_row(temp_board, col)
            if row is not None: # Ensure the column wasn't full (should be covered by valid_locations)
                drop_piece(temp_board, row, col, self.player_id)
                if winning_move(temp_board, self.player_id):
                    # print(f"SmarterRandom AI {self.player_id}: Found winning move in column {col}")
                    return col

        # 2. Check for immediate winning move for opponent and block it
        for col in valid_locations:
            temp_board = board.copy() # Use a copy to simulate opponent's move
            row = get_next_open_row(temp_board, col)
            if row is not None:
                drop_piece(temp_board, row, col, self.opponent_id)
                if winning_move(temp_board, self.opponent_id):
                    # print(f"SmarterRandom AI {self.player_id}: Blocking opponent win in column {col}")
                    return col

        # 3. If no win/block, choose a random valid move
        move = random.choice(valid_locations)
        # print(f"SmarterRandom AI {self.player_id}: No win/block found. Choosing random column {move}")
        return move

print("SlightlyBetterRandomAIPlayer class Loaded.")



SlightlyBetterRandomAIPlayer class Loaded.


## AI Player - MCTS player

In [161]:
# Monte Carlo Tree Search (MCTS) AI Player with Heuristic Playout & Corrected UCT

import numpy as np
import random
import math
import time
import copy

# Assuming functions from Cell 1 and Player class from Cell 2 are loaded
# from Cell1 import (create_board, print_board, is_valid_location, get_next_open_row,
#                    drop_piece, get_valid_locations, winning_move, is_terminal_node,
#                    PLAYER1_PIECE, PLAYER2_PIECE, EMPTY, ROWS, COLS)
# from Cell2 import Player

# --- Define Positional Heuristic ---
POSITIONAL_VALUES_RAW = np.array([
    [3, 4, 5, 7, 5, 4, 3],
    [4, 6, 8, 10, 8, 6, 4],
    [5, 8, 11, 13, 11, 8, 5],
    [5, 8, 11, 13, 11, 8, 5],
    [4, 6, 8, 10, 8, 6, 4],
    [3, 4, 5, 7, 5, 4, 3]
])
POSITIONAL_VALUES = np.flipud(POSITIONAL_VALUES_RAW)

# --- MCTS Node ---
class MCTSNode:
    """ Represents a node in the Monte Carlo Search Tree. """
    def __init__(self, state, parent=None, move=None, player_at_node=None):
        self.state = state
        self.parent = parent
        self.move = move
        self.children = []
        self.visits = 0
        self.score = 0 # Score relative to player_at_node (+1 win, -1 loss, 0 draw)
        self.untried_moves = get_valid_locations(state)
        self.player_at_node = player_at_node # Player whose turn it is AT THIS NODE

    # ***** THIS FUNCTION IS UPDATED *****
    def uct_select_child(self, exploration_constant=1.414):
        """ Selects a child node using the UCT formula, from the parent's perspective. """
        children_with_visits = [c for c in self.children if c.visits > 0]

        if not self.children:
             return None # No children to select

        # Ensure parent has visits for log calculation
        if self.visits == 0:
             return random.choice(self.children) if self.children else None

        log_parent_visits = math.log(self.visits)

        def uct_score(node):
            """ Calculates the UCT score for a child node from the parent's perspective. """
            if node.visits == 0:
                return float('inf')

            # node.score / node.visits is the win rate for the player AT THE CHILD node.
            child_player_win_rate = node.score / node.visits

            # The parent wants to maximize ITS OWN win rate.
            # Since the child player is always the opponent in Connect 4,
            # Parent's Win Rate = - (Child Player's Win Rate)
            parent_perspective_win_rate = -child_player_win_rate

            exploration_term = exploration_constant * math.sqrt(log_parent_visits / node.visits)

            return parent_perspective_win_rate + exploration_term

        # Select the child with the highest UCT score (best for the parent node)
        # Consider all children. Unvisited children will get infinite score and be chosen first.
        selected_child = max(self.children, key=uct_score)

        return selected_child
        

    def add_child(self, move, state, player_at_new_node):
        """ Adds a new child node. """
        node = MCTSNode(state=state, parent=self, move=move, player_at_node=player_at_new_node)
        if move in self.untried_moves:
             self.untried_moves.remove(move)
        self.children.append(node)
        return node

    def update(self, result_from_perspective_of_player_at_this_node):
        """ Updates visit count and score. """
        self.visits += 1
        self.score += result_from_perspective_of_player_at_this_node


# --- MCTS Player ---
class MCTSPlayer(Player):
    """ AI player implementing Monte Carlo Tree Search with Heuristic Playouts. """
    def __init__(self, player_id, iterations=1000, exploration_constant=1.414):
        super().__init__(player_id)
        self.opponent_id = PLAYER1_PIECE if player_id == PLAYER2_PIECE else PLAYER2_PIECE
        self.n_iterations = iterations
        self.exploration_constant = exploration_constant
        self.positional_values = POSITIONAL_VALUES
        print(f"MCTSPlayer initialized for Player {self.player_id} ({self.n_iterations} iterations/move, Heuristic Playouts, Corrected UCT)")

    def get_move(self, board):
        start_time = time.time()
        root = MCTSNode(state=board.copy(), player_at_node=self.player_id)

        # Check immediate win/loss first (efficient)
        valid_locations = get_valid_locations(board)
        for col in valid_locations:
             temp_board_win = board.copy()
             row_win = get_next_open_row(temp_board_win, col)
             drop_piece(temp_board_win, row_win, col, self.player_id)
             if winning_move(temp_board_win, self.player_id):
                 print(f"MCTS Player {self.player_id}: Found immediate winning move {col}")
                 return col
        for col in valid_locations:
             temp_board_loss = board.copy()
             row_loss = get_next_open_row(temp_board_loss, col)
             drop_piece(temp_board_loss, row_loss, col, self.opponent_id)
             if winning_move(temp_board_loss, self.opponent_id):
                 print(f"MCTS Player {self.player_id}: Found immediate block at {col}")
                 return col

        # MCTS main loop
        for i in range(self.n_iterations):
            node = root
            current_board_state = board.copy()

            # 1. Selection
            # Node is fully expanded and non-terminal
            while not node.untried_moves and node.children:
                node = node.uct_select_child(self.exploration_constant)
                if node is None: break # Safety check if no children selectable
                 # Apply move to descend tree
                row = get_next_open_row(current_board_state, node.move)
                if row is None: 
                    print(f"Warning: Invalid move {node.move} selected during descent.")
                    break
                drop_piece(current_board_state, row, node.move, node.parent.player_at_node) # Player at parent made the move

            if node is None: continue # Skip iteration if selection failed


            # 2. Expansion
            # If the selected node is not terminal and has untried moves
            if node.untried_moves and not is_terminal_node(node.state): # Check if node state itself is terminal
                move = random.choice(node.untried_moves) # Expand randomly among untried
                current_player = node.player_at_node
                next_player = self.opponent_id if current_player == self.player_id else self.player_id

                # Apply the expansion move to the state inherited from selection
                row = get_next_open_row(current_board_state, move)
                if row is not None:
                    drop_piece(current_board_state, row, move, current_player)
                    node = node.add_child(move, current_board_state.copy(), next_player) # Add child with the *new* state
                else:
                    print(f"Warning: Attempted to expand invalid move {move}. Removing.")
                    node.untried_moves.remove(move)
                    continue # Skip to next iteration if expansion failed

            # 3. Simulation (Playout) - WITH HEURISTICS
            simulation_board = current_board_state.copy()
            sim_player = node.player_at_node

            # Check if the node state itself is terminal before starting simulation loop
            is_sim_terminal = is_terminal_node(simulation_board)

            while not is_sim_terminal:
                valid_moves = get_valid_locations(simulation_board)
                if not valid_moves:
                    is_sim_terminal = True # Draw
                    break

                # --- START: Heuristic Playout Move Selection ---
                chosen_move = None
                winning_move_found = False
                for m in valid_moves:
                    temp_board_win = simulation_board.copy()
                    r_win = get_next_open_row(temp_board_win, m)
                    drop_piece(temp_board_win, r_win, m, sim_player)
                    if winning_move(temp_board_win, sim_player):
                        chosen_move = m
                        winning_move_found = True
                        break
                if winning_move_found: pass
                else:
                    sim_opponent = self.opponent_id if sim_player == self.player_id else self.player_id
                    blocking_move_found = False
                    for m in valid_moves:
                        temp_board_block = simulation_board.copy()
                        r_block = get_next_open_row(temp_board_block, m)
                        drop_piece(temp_board_block, r_block, m, sim_opponent)
                        if winning_move(temp_board_block, sim_opponent):
                             chosen_move = m
                             blocking_move_found = True
                             break
                    if blocking_move_found: pass
                if not winning_move_found and not blocking_move_found:
                    move_values = {}
                    for m in valid_moves:
                        r = get_next_open_row(simulation_board, m)
                        move_values[m] = self.positional_values[r][m]
                    total_value = sum(v for v in move_values.values() if v > 0)
                    if total_value > 0:
                         weights = [max(0, move_values[m]) for m in valid_moves]
                         sum_weights = sum(weights)
                         if sum_weights > 0:
                             probabilities = [w / sum_weights for w in weights]
                             chosen_move = random.choices(valid_moves, weights=probabilities, k=1)[0]
                         else: chosen_move = random.choice(valid_moves)
                    else: chosen_move = random.choice(valid_moves)
                # --- END: Heuristic Playout Move Selection ---

                row = get_next_open_row(simulation_board, chosen_move)
                drop_piece(simulation_board, row, chosen_move, sim_player)
                sim_player = self.opponent_id if sim_player == self.player_id else self.player_id # Switch player

                # Check if terminal *after* the move
                is_sim_terminal = is_terminal_node(simulation_board)


            # Determine simulation result (from the final simulation_board state)
            winner = None
            sim_draw = False
            last_player = self.opponent_id if sim_player == self.player_id else self.player_id # Player who made the last move
            if winning_move(simulation_board, last_player):
                 winner = last_player
            # Check for draw only if no winner
            elif not get_valid_locations(simulation_board) and not winner:
                 sim_draw = True

            sim_result_for_mcts_player = 0 # Draw is 0
            if winner == self.player_id:
                sim_result_for_mcts_player = 1
            elif winner == self.opponent_id:
                sim_result_for_mcts_player = -1

            # 4. Backpropagation
            temp_node = node # Start backprop from the node where simulation started
            while temp_node is not None:
                 result_for_node = sim_result_for_mcts_player if temp_node.player_at_node == self.player_id else -sim_result_for_mcts_player
                 temp_node.update(result_for_node)
                 temp_node = temp_node.parent

        # --- Choose the best move ---
        if not root.children:
             print(f"MCTS Player {self.player_id}: Warning - No moves explored/possible after {self.n_iterations} iterations. Choosing random.")
             return random.choice(get_valid_locations(board)) if get_valid_locations(board) else None # Fallback

        # Select child with highest number of visits (most robust)
        best_child = max(root.children, key=lambda c: c.visits)
        best_move = best_child.move

        # --- Display Info ---
        end_time = time.time()
        # Calculate win rate for display (from MCTS player's perspective)
        # Score is relative to the child's player. Parent win rate = - (child score / visits)
        parent_win_rate_for_best_child = (-best_child.score / best_child.visits) if best_child.visits > 0 else 0.0
        win_rate_display = (parent_win_rate_for_best_child + 1) / 2 * 100 # Scale -1..+1 to 0..100%

        print(f"MCTS Player {self.player_id}: Chose column {best_move} "
              f"({best_child.visits} visits, "
              f"~WinRate: {win_rate_display:.1f}%, "
              f"Time: {end_time - start_time:.2f}s)")

        # Sanity check: is the chosen move actually valid?
        if best_move not in get_valid_locations(board):
             print(f"MCTS Warning: Chosen best move {best_move} is invalid! Fallback to random.")
             valid_fallback = get_valid_locations(board)
             return random.choice(valid_fallback) if valid_fallback else None

        return best_move


print("MCTSPlayer class Updated with Heuristic Playouts and Corrected UCT.")


MCTSPlayer class Updated with Heuristic Playouts and Corrected UCT.


## Q-Learning Agent Class

In [163]:
# Q-Learning Agent Class

import numpy as np
import random
import pickle
import os
import time

# --- Positional Values ---
POSITIONAL_VALUES_RAW = np.array([
    [3, 4, 5, 7, 5, 4, 3], [4, 6, 8, 10, 8, 6, 4], [5, 8, 11, 13, 11, 8, 5],
    [5, 8, 11, 13, 11, 8, 5], [4, 6, 8, 10, 8, 6, 4], [3, 4, 5, 7, 5, 4, 3]
])
POSITIONAL_VALUES = np.flipud(POSITIONAL_VALUES_RAW)

class QLearningAgent(Player):
    """ A Reinforcement Learning agent using Q-learning for Connect 4. """
    def __init__(self, player_id, learning_rate=0.1, discount_factor=0.9,
                 exploration_rate=1.0, exploration_decay=0.9995, min_exploration_rate=0.01):
        super().__init__(player_id)
        self.opponent_id = PLAYER1_PIECE if player_id == PLAYER2_PIECE else PLAYER2_PIECE
        self.q_table = {}
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = exploration_rate
        self.epsilon_decay = exploration_decay
        self.min_epsilon = min_exploration_rate
        self.previous_state_tuple = None
        self.previous_action = None
        self.is_learning = True
        self.verbose = False 

    def _state_to_tuple(self, board): return tuple(map(tuple, board))
    
    def _flip_state(self, board):
        flipped_board = board.copy()
        p1_mask, p2_mask = flipped_board == PLAYER1_PIECE, flipped_board == PLAYER2_PIECE
        flipped_board[p1_mask], flipped_board[p2_mask] = PLAYER2_PIECE, PLAYER1_PIECE
        return flipped_board
   
    def get_q_value(self, state_tuple, action): return self.q_table.get((state_tuple, action), 0.0)
   
    def choose_action(self, board):
        if self.player_id == PLAYER1_PIECE: lookup_board = board
        else: lookup_board = self._flip_state(board)
        state_tuple = self._state_to_tuple(lookup_board)
        valid_actions = get_valid_locations(board)
        details = {'max_q': None, 'chosen_q': None, 'tie_break_used': False, 'exploited': False}
        if not valid_actions: return None, details
        if self.is_learning and random.uniform(0, 1) < self.epsilon:
            chosen_action = random.choice(valid_actions)
            details['chosen_q'] = self.get_q_value(state_tuple, chosen_action); details['exploited'] = False
            return chosen_action, details
        else:
            details['exploited'] = True
            q_values = {action: self.get_q_value(state_tuple, action) for action in valid_actions}
            if not q_values: return random.choice(valid_actions), details
            max_q = -float('inf'); tolerance = 1e-9
            for q in q_values.values():
                 if q > max_q: max_q = q
            details['max_q'] = max_q
            best_actions = [action for action, q in q_values.items() if abs(q - max_q) < tolerance]
            chosen_action = None
            if len(best_actions) == 1: chosen_action = best_actions[0]
            elif len(best_actions) > 1:
                details['tie_break_used'] = True; best_heuristic_value = -float('inf'); tied_heuristic_actions = []
                for action in best_actions:
                    row = get_next_open_row(board, action)
                    if row is not None:
                         heuristic_value = POSITIONAL_VALUES[row][action]
                         if heuristic_value > best_heuristic_value: best_heuristic_value = heuristic_value; tied_heuristic_actions = [action]
                         elif heuristic_value == best_heuristic_value: tied_heuristic_actions.append(action)
                if tied_heuristic_actions: chosen_action = random.choice(tied_heuristic_actions)
                else: chosen_action = random.choice(best_actions)
            else: chosen_action = random.choice(valid_actions)
            details['chosen_q'] = q_values.get(chosen_action, 0.0)
            return chosen_action, details


    def get_move(self, board):
        """Gets the agent's move. Prints details ONLY if self.verbose is True."""
        start_time = time.time()
        chosen_action, details = self.choose_action(board.copy())
        end_time = time.time()
        elapsed_time = end_time - start_time

        if chosen_action is None:
            print(f"Q-Agent {self.player_id}: No valid moves available.")
            return None


        if self.verbose:
            max_q_str = f"{details['max_q']:.4f}" if details['max_q'] is not None else "N/A"
            chosen_q_str = f"{details['chosen_q']:.4f}" if details['chosen_q'] is not None else "N/A"
            mode = "Exploit" if details['exploited'] else "Explore"
            tie_info = "(TIEBREAK)" if details['tie_break_used'] else ""

            print(f"Q-Agent {self.player_id}: Chose column {chosen_action} "
                  f"({mode}{tie_info}, "
                  f"ChosenQ: {chosen_q_str}, MaxQ: {max_q_str}, "
                  f"Time: {elapsed_time:.3f}s)")


        # Store state/action for learning (runs ONLY if learning is enabled)
        if self.is_learning:
            self.previous_state_tuple = self._state_to_tuple(board)
            self.previous_action = chosen_action

        return chosen_action


    def learn(self, reward, next_board):
        if not self.is_learning or self.previous_state_tuple is None or self.previous_action is None: return
        old_q = self.get_q_value(self.previous_state_tuple, self.previous_action)
        if self.player_id == PLAYER1_PIECE: lookup_next_board = next_board
        else: lookup_next_board = self._flip_state(next_board)
        next_state_tuple = self._state_to_tuple(lookup_next_board)
        valid_next_actions = get_valid_locations(next_board)
        max_future_q = 0.0
        if not is_terminal_node(next_board) and valid_next_actions:
            q_values_next = [self.get_q_value(next_state_tuple, action) for action in valid_next_actions]
            if q_values_next: max_future_q = max(q_values_next)
        new_q = old_q + self.lr * (reward + self.gamma * max_future_q - old_q)
        self.q_table[(self.previous_state_tuple, self.previous_action)] = new_q

    def update_epsilon(self):
        if self.is_learning: self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)

    def save_q_table(self, filepath="q_table.pkl"):
        temp_filepath = filepath + ".tmp"; final_filepath = filepath
        try:
            with open(temp_filepath, 'wb') as f: pickle.dump(self.q_table, f)
            os.replace(temp_filepath, final_filepath)
            print(f"Q-table saved successfully to {final_filepath} ({len(self.q_table)} entries)")
        except Exception as e:
            print(f"Error saving Q-table: {e}")
            if os.path.exists(temp_filepath):
                try: os.remove(temp_filepath)
                except Exception as e_rem: print(f"Error removing temp file: {e_rem}")

    def load_q_table(self, filepath="q_table.pkl"):
        try:
            if os.path.exists(filepath):
                with open(filepath, 'rb') as f: self.q_table = pickle.load(f)
                print(f"Q-table loaded successfully from {filepath} ({len(self.q_table)} entries)")
            else:
                print(f"Q-table file not found at {filepath}. Starting fresh."); self.q_table = {}
        except (pickle.UnpicklingError, EOFError, ValueError) as e_load:
            print(f"Error loading Q-table from {filepath}: File might be corrupted or incompatible. {e_load}")
            print("Starting with an empty Q-table."); self.q_table = {}
        except Exception as e:
            print(f"An unexpected error occurred loading Q-table: {e}"); self.q_table = {}


print("QLearningAgent class updated with Verbose Flag for printing control.")

QLearningAgent class updated with Verbose Flag for printing control.


## Training Loop for Q-Learning Agent

In [134]:
# Training Loop for Q-Learning Agent

# --- Training Configuration ---
N_EPISODES = 100000       
PRINT_EVERY = 5000
SAVE_EVERY = 10000

# Rewards
WIN_REWARD = 10.0
LOSS_REWARD = -10.0
DRAW_REWARD = -1.0         
# STEP_REWARD = -0.01      
STEP_REWARD = 0.0        

# --- Heuristic Reward Shaping ---


HEURISTIC_WEIGHT = 0.01 # Weight for the positional value bonus. We can try different values here (0.005, 0.01, 0.02)



Q_TABLE_FILE = "connect4_q_agent_heuristic.pkl" 

# --- Opponent ---
opponent_player_class = SlightlyBetterRandomAIPlayer

# --- Initialize Agent ---
q_agent = QLearningAgent(player_id=PLAYER1_PIECE)
q_agent.load_q_table(Q_TABLE_FILE)
q_agent.is_learning = True

opponent = opponent_player_class(PLAYER2_PIECE)

# --- Training Statistics ---
win_count = 0
loss_count = 0
draw_count = 0
recent_outcomes = []

print(f"\n--- Starting Q-Learning Training with Heuristics ({N_EPISODES} episodes) ---")
print(f"Agent: {type(q_agent).__name__} (P1) vs Opponent: {type(opponent).__name__} (P2)")
print(f"Heuristic Weight: {HEURISTIC_WEIGHT}")
print(f"Initial Epsilon: {q_agent.epsilon:.4f}, Q-table size: {len(q_agent.q_table)}")

start_total_time = time.time()

for episode in range(1, N_EPISODES + 1):
    board = create_board()
    game_over = False
    turn = 0
    last_agent_state = None

    while not game_over:
        current_player_obj = q_agent if turn == 0 else opponent
        player_piece = PLAYER1_PIECE if turn == 0 else PLAYER2_PIECE

        # --- Get Move ---
        col = current_player_obj.get_move(board.copy())

        if col is None or not is_valid_location(board, col):
             print(f"Episode {episode}: Invalid move {col} by Player {player_piece}. Ending episode.")
             game_over = True
             if turn == 0: q_agent.learn(LOSS_REWARD * 2, board)
             break

        row = get_next_open_row(board, col)
        drop_piece(board, row, col, player_piece)
        next_board_state = board.copy()

        # --- Determine Reward ---
        step_reward_value = STEP_REWARD # Start with base step reward
        final_reward = None # Will be set if game ends

        if winning_move(board, player_piece):
            game_over = True
            if player_piece == q_agent.player_id:
                final_reward = WIN_REWARD
                win_count += 1
                outcome = 'W'
            else: # Opponent won
                final_reward = LOSS_REWARD
                loss_count += 1
                outcome = 'L'

        elif len(get_valid_locations(board)) == 0: # Draw
            game_over = True
            final_reward = DRAW_REWARD
            draw_count += 1
            outcome = 'D'

        else: # Game continues
            outcome = None
            # --- Apply Heuristic Reward Shaping IF agent just moved ---
            if turn == 0:
                positional_bonus = POSITIONAL_VALUES[row][col] * HEURISTIC_WEIGHT
                step_reward_value += positional_bonus

        # --- Agent Learning Step ---
        # Learn based on the transition caused BY THE PREVIOUS ACTION stored in the agent
        if q_agent.previous_state_tuple:
             # Determine the reward to use for the *previous* step's learning update
             learn_reward = final_reward if game_over else step_reward_value
             q_agent.learn(learn_reward, next_board_state) # next_board_state is the result of the current move


        # --- Switch Turn ---
        if not game_over:
            turn = 1 - turn

    # --- End of Episode ---
    q_agent.update_epsilon()
    if outcome: recent_outcomes.append(outcome)

    # --- Print Progress & Save ---
    if episode % PRINT_EVERY == 0:
        end_time = time.time()
        elapsed = end_time - start_total_time
        avg_time = elapsed / episode if episode > 0 else 0
        recent_window = recent_outcomes[-PRINT_EVERY:]
        recent_wins = recent_window.count('W')
        recent_losses = recent_window.count('L')
        recent_draws = recent_window.count('D')
        recent_total = len(recent_window)
        win_rate = recent_wins / recent_total * 100 if recent_total > 0 else 0
        print(f"Ep: {episode}/{N_EPISODES} | "
              f"Eps: {q_agent.epsilon:.4f} | "
              f"Q-Size: {len(q_agent.q_table)} | "
              f"Last {PRINT_EVERY}: W:{recent_wins}({win_rate:.1f}%) L:{recent_losses} D:{recent_draws} | "
              f"Avg Time/Ep: {avg_time:.3f}s")
    if episode % SAVE_EVERY == 0:
         q_agent.save_q_table(Q_TABLE_FILE)


# --- End of Training ---
total_time = time.time() - start_total_time
print("\n--- Training Finished ---")
print(f"Total time: {total_time:.2f} seconds")
print(f"Final Epsilon: {q_agent.epsilon:.4f}")
print(f"Final Q-table size: {len(q_agent.q_table)}")
print(f"Overall Stats: Wins: {win_count}, Losses: {loss_count}, Draws: {draw_count}")
q_agent.save_q_table(Q_TABLE_FILE)
q_agent.is_learning = False
q_agent.epsilon = 0
print("\nAgent set to evaluation mode.")

Q-table loaded successfully from connect4_q_agent_heuristic.pkl (307021 entries)
SlightlyBetterRandomAIPlayer initialized for Player 2 (Opponent: 1)

--- Starting Q-Learning Training with Heuristics (100000 episodes) ---
Agent: QLearningAgent (P1) vs Opponent: SlightlyBetterRandomAIPlayer (P2)
Heuristic Weight: 0.01
Initial Epsilon: 1.0000, Q-table size: 307021
Ep: 5000/100000 | Eps: 0.0820 | Q-Size: 335345 | Last 5000: W:1352(27.0%) L:3608 D:40 | Avg Time/Ep: 0.018s
Ep: 10000/100000 | Eps: 0.0100 | Q-Size: 350094 | Last 5000: W:2662(53.2%) L:2284 D:54 | Avg Time/Ep: 0.018s
Error saving Q-table: 
Ep: 15000/100000 | Eps: 0.0100 | Q-Size: 362208 | Last 5000: W:2969(59.4%) L:1943 D:88 | Avg Time/Ep: 0.020s
Ep: 20000/100000 | Eps: 0.0100 | Q-Size: 373991 | Last 5000: W:2971(59.4%) L:1936 D:93 | Avg Time/Ep: 0.019s
Error saving Q-table: 
Ep: 25000/100000 | Eps: 0.0100 | Q-Size: 385876 | Last 5000: W:2965(59.3%) L:1950 D:85 | Avg Time/Ep: 0.021s
Ep: 30000/100000 | Eps: 0.0100 | Q-Size: 39762

## Training Loop for Q-Learning Agent - gen 2

In [136]:
# Q-Learning Self-Play Training Loop

import numpy as np
import random
import pickle
import os
import time


# --- Training Configuration ---
N_EPISODES = 100000     
PRINT_EVERY = 5000
SAVE_EVERY = 10000       

# Rewards (Keep consistent unless you have a reason to change)
WIN_REWARD = 10.0
LOSS_REWARD = -10.0
DRAW_REWARD = -1.0
STEP_REWARD = 0.0

# Heuristic Weight (Keep or adjust for the agent being trained)
HEURISTIC_WEIGHT = 0.01

# --- File Paths ---
EXISTING_Q_TABLE_FILE = "connect4_q_agent_heuristic.pkl" # Load previous agent from here
NEW_Q_TABLE_FILE = "connect4_q_agent_selfplay_gen2.pkl" # Save NEW agent here

# --- Initialize Agents ---
players_ready = True
q_agent_opponent = None # The fixed opponent (loads existing table)
q_agent_trainer = None  # The agent being trained (starts fresh or from existing)

# 1. Initialize Opponent Agent (Fixed Policy)
try:
    print(f"Loading FIXED Opponent (Player 2) policy from: {EXISTING_Q_TABLE_FILE}")
    q_agent_opponent = QLearningAgent(player_id=PLAYER2_PIECE) # Assign as P2

    # Load the previously trained Q-table
    q_agent_opponent.load_q_table(EXISTING_Q_TABLE_FILE)
    if not q_agent_opponent.q_table:
         print(f"WARNING: Opponent Q-table file '{EXISTING_Q_TABLE_FILE}' not found or loaded empty.")
         # Decide if you want to stop or proceed with a 'dumb' opponent
    else:
        print(f"Opponent Q-table loaded ({len(q_agent_opponent.q_table)} entries).")

    # CRITICAL: Set opponent to EVALUATION mode
    q_agent_opponent.is_learning = False
    q_agent_opponent.epsilon = 0.0
    print("Opponent agent set to evaluation mode (no learning, no exploration).")

except NameError:
    print("\nERROR: 'QLearningAgent' class not defined.")
    players_ready = False
except FileNotFoundError:
    print(f"\nERROR: Existing Q-table file '{EXISTING_Q_TABLE_FILE}' not found.")
    players_ready = False
except Exception as e:
    print(f"\nAn unexpected error occurred setting up the Opponent Agent: {e}")
    players_ready = False


# 2. Initialize Agent to be Trained (Self-Play)
if players_ready:
    try:
        print("\nInitializing NEW Agent (Player 1) for self-play training...")
        # Option 1: Start training from scratch
        q_agent_trainer = QLearningAgent(
            player_id=PLAYER1_PIECE,
            learning_rate=0.1,        
            discount_factor=0.99,     
            exploration_rate=1.0,     
            exploration_decay=0.9999, 
            min_exploration_rate=0.05 
        )

        # Ensure trainer is set to LEARNING mode
        q_agent_trainer.is_learning = True
        print(f"Trainer agent (P1) initialized (Gamma: {q_agent_trainer.gamma}, Epsilon Decay: {q_agent_trainer.epsilon_decay}, Min Epsilon: {q_agent_trainer.min_epsilon}).")

    except NameError:
        print("\nERROR: 'QLearningAgent' class not defined.")
        players_ready = False
    except Exception as e:
        print(f"\nAn unexpected error occurred setting up the Trainer Agent: {e}")
        players_ready = False

# --- Training Loop ---
if players_ready and q_agent_trainer and q_agent_opponent:
    win_count = 0
    loss_count = 0
    draw_count = 0
    recent_outcomes = []

    print(f"\n--- Starting Q-Learning Self-Play Training ({N_EPISODES} episodes) ---")
    print(f"Agent: QLearningAgent (Trainer, P1) vs Opponent: QLearningAgent (Fixed, P2)")
    print(f"Saving trained agent to: {NEW_Q_TABLE_FILE}")
    print(f"Initial Epsilon: {q_agent_trainer.epsilon:.4f}, Initial Q-table size: {len(q_agent_trainer.q_table)}")

    start_total_time = time.time()

    for episode in range(1, N_EPISODES + 1):
        board = create_board()
        game_over = False
        turn = 0 # 0 for Trainer (P1), 1 for Opponent (P2)

        # Reset the trainer's previous state/action at the start of each episode
        q_agent_trainer.previous_state_tuple = None
        q_agent_trainer.previous_action = None

        while not game_over:
            # Determine current player object
            current_player_obj = q_agent_trainer if turn == 0 else q_agent_opponent
            player_piece = PLAYER1_PIECE if turn == 0 else PLAYER2_PIECE

            # --- Get Move ---
            # Opponent uses its fixed policy; Trainer uses its learning policy
            col = current_player_obj.get_move(board.copy())

            if col is None or not is_valid_location(board, col):
                 print(f"Episode {episode}: Invalid move {col} by Player {player_piece} ({type(current_player_obj).__name__}). Ending.")
                 # Penalize heavily if the trainer made the invalid move
                 if turn == 0:
                      # Use learn method with a large penalty
                      if q_agent_trainer.previous_state_tuple and q_agent_trainer.previous_action is not None:
                           q_agent_trainer.learn(LOSS_REWARD * 2, board) # Penalize based on previous state/action
                      else: # If it's the very first move
                           # Cannot learn directly, just record loss
                           loss_count += 1
                           outcome = 'L'
                 game_over = True
                 break # End episode

            row = get_next_open_row(board, col)
            drop_piece(board, row, col, player_piece)
            next_board_state = board.copy()

            # --- Determine Reward ---
            step_reward_value = STEP_REWARD
            final_reward = None
            outcome = None

            if winning_move(board, player_piece):
                game_over = True
                if player_piece == q_agent_trainer.player_id: # Trainer won
                    final_reward = WIN_REWARD
                    win_count += 1
                    outcome = 'W'
                else: # Opponent won (Trainer lost)
                    final_reward = LOSS_REWARD
                    loss_count += 1
                    outcome = 'L'
            elif len(get_valid_locations(board)) == 0: # Draw
                game_over = True
                final_reward = DRAW_REWARD
                draw_count += 1
                outcome = 'D'
            else: # Game continues
                # Apply Heuristic Reward Shaping IF trainer just moved
                if turn == 0: # Trainer (P1) just moved
                    positional_bonus = POSITIONAL_VALUES[row][col] * HEURISTIC_WEIGHT
                    step_reward_value += positional_bonus


            # Learn based on the transition caused BY THE TRAINER'S LAST ACTION
            if q_agent_trainer.previous_state_tuple and q_agent_trainer.previous_action is not None:
                 # If game ended, use final reward. Otherwise, use step reward.
                 learn_reward = final_reward if game_over else step_reward_value
                 q_agent_trainer.learn(learn_reward, next_board_state)

                 # Reset after learning from this step to avoid reusing old state if opponent plays invalid move
                 if not game_over: 
                    pass 

            # --- Switch Turn ---
            if not game_over:
                turn = 1 - turn

        # --- End of Episode ---
        q_agent_trainer.update_epsilon() # Decay trainer's epsilon
        if outcome: recent_outcomes.append(outcome)

        # --- Print Progress & Save ---
        if episode % PRINT_EVERY == 0:
            end_time = time.time()
            elapsed = end_time - start_total_time
            # Calculate avg time per episode more robustly
            avg_time = (elapsed / episode) if episode > 0 else 0

            recent_window = recent_outcomes[-PRINT_EVERY:]
            recent_wins = recent_window.count('W')
            recent_losses = recent_window.count('L')
            recent_draws = recent_window.count('D')
            recent_total = len(recent_window)
            win_rate = recent_wins / recent_total * 100 if recent_total > 0 else 0

            print(f"Ep: {episode}/{N_EPISODES} | "
                  f"Eps: {q_agent_trainer.epsilon:.4f} | "
                  f"Q-Size: {len(q_agent_trainer.q_table)} | "
                  f"Last {PRINT_EVERY}: W:{recent_wins}({win_rate:.1f}%) L:{recent_losses} D:{recent_draws} | "
                  f"Avg Time/Ep: {avg_time:.3f}s")

        if episode % SAVE_EVERY == 0 or episode == N_EPISODES: # Save also on last episode
             # Save the TRAINER's Q-table to the NEW file
             q_agent_trainer.save_q_table(NEW_Q_TABLE_FILE)


    # --- End of Training ---
    total_time = time.time() - start_total_time
    print("\n--- Self-Play Training Finished ---")
    print(f"Total time: {total_time:.2f} seconds")
    print(f"Final Epsilon: {q_agent_trainer.epsilon:.4f}")
    print(f"Final Q-table size: {len(q_agent_trainer.q_table)}")
    print(f"Overall Stats: Wins: {win_count}, Losses: {loss_count}, Draws: {draw_count}")
    # Final save is handled in the loop now
    q_agent_trainer.is_learning = False
    q_agent_trainer.epsilon = 0 # Set trainer to eval mode after training
    print(f"\nTrainer agent saved to {NEW_Q_TABLE_FILE} and set to evaluation mode.")

else:
     print("\nAgent initialization failed. Cannot start training.")

Loading FIXED Opponent (Player 2) policy from: connect4_q_agent_heuristic.pkl
Q-table loaded successfully from connect4_q_agent_heuristic.pkl (307021 entries)
Opponent Q-table loaded (307021 entries).
Opponent agent set to evaluation mode (no learning, no exploration).

Initializing NEW Agent (Player 1) for self-play training...
Trainer agent (P1) initialized (Gamma: 0.99, Epsilon Decay: 0.9999, Min Epsilon: 0.05).

--- Starting Q-Learning Self-Play Training (100000 episodes) ---
Agent: QLearningAgent (Trainer, P1) vs Opponent: QLearningAgent (Fixed, P2)
Saving trained agent to: connect4_q_agent_selfplay_gen2.pkl
Initial Epsilon: 1.0000, Initial Q-table size: 0
Ep: 5000/100000 | Eps: 0.6065 | Q-Size: 14230 | Last 5000: W:1810(36.2%) L:3189 D:1 | Avg Time/Ep: 0.004s
Ep: 10000/100000 | Eps: 0.3679 | Q-Size: 20968 | Last 5000: W:3192(63.8%) L:1805 D:3 | Avg Time/Ep: 0.003s
Q-table saved successfully to connect4_q_agent_selfplay_gen2.pkl (20968 entries)
Ep: 15000/100000 | Eps: 0.2231 | Q-S

## AI Player - Hybrid MCTS Q-agent

In [179]:
# Hybrid MCTS + Q-Agent Player

import numpy as np
import random
import math
import time
import copy
import pickle
import os # Added os for path checks if needed later




# --- MCTS Node ---
class MCTSNode:
    """ Represents a node in the Monte Carlo Search Tree. """
    def __init__(self, state, parent=None, move=None, player_at_node=None):
        self.state = state; self.parent = parent; self.move = move
        self.children = []; self.visits = 0; self.score = 0
        self.untried_moves = get_valid_locations(state); self.player_at_node = player_at_node

    def uct_select_child(self, exploration_constant=1.414):
        """ Selects a child node using the UCT formula, from the parent's perspective. """
        if not self.children: return None
        if self.visits == 0: return random.choice(self.children) if self.children else None
        log_parent_visits = math.log(self.visits)
        def uct_score(node):
            if node.visits == 0: return float('inf')
            child_player_win_rate = node.score / node.visits
            parent_perspective_win_rate = -child_player_win_rate
            exploration_term = exploration_constant * math.sqrt(log_parent_visits / node.visits)
            return parent_perspective_win_rate + exploration_term
        return max(self.children, key=uct_score)

    def add_child(self, move, state, player_at_new_node):
        """ Adds a new child node. """
        node = MCTSNode(state=state, parent=self, move=move, player_at_node=player_at_new_node)
        # Ensure move exists before trying to remove (should always be true if called correctly)
        if move in self.untried_moves:
             self.untried_moves.remove(move)
        else:
             pass
        self.children.append(node); return node

    def update(self, result_from_perspective_of_player_at_this_node):
        """ Updates visit count and score. """
        self.visits += 1; self.score += result_from_perspective_of_player_at_this_node



# --- Hybrid Player ---
class MCTS_QAgent_Hybrid(Player):
    """ Combines MCTS search with Q-Agent guided simulations. """

    def __init__(self, player_id, q_table_path, iterations=1000, exploration_constant=1.414):
        super().__init__(player_id)
        self.opponent_id = PLAYER1_PIECE if player_id == PLAYER2_PIECE else PLAYER2_PIECE
        self.n_iterations = iterations
        self.exploration_constant = exploration_constant

        # --- Load the Q-Agent for internal use ---
        self.q_agent = None
        print(f"Initializing Hybrid Agent (Player {player_id})")
        try:
            self.q_agent = QLearningAgent(player_id=PLAYER1_PIECE) # Internal agent always thinks it's P1
            self.q_agent.load_q_table(q_table_path)
            if not self.q_agent.q_table:
                print(f"WARNING: Q-table file '{q_table_path}' loaded empty for Hybrid agent.")
            self.q_agent.is_learning = False
            self.q_agent.epsilon = 0.0
            self.q_agent.verbose = False
            print(f"Internal Q-Agent loaded ({len(self.q_agent.q_table)} entries) for Hybrid Player {player_id}.")
        except NameError:
            print("ERROR: QLearningAgent class not defined. Cannot create Hybrid Agent.")
            raise
        except FileNotFoundError:
             print(f"ERROR: Q-table file '{q_table_path}' not found for Hybrid Agent.")
             raise
        except Exception as e:
            print(f"ERROR loading Q-table for Hybrid Agent: {e}")
            raise

        if self.q_agent is None:
             raise ValueError("Failed to initialize internal Q-Agent for Hybrid player.")


    def _simulate_with_q_agent(self, board_state, starting_player):
        """ Performs a playout using the Q-agent to choose moves. """
        simulation_board = board_state.copy()
        sim_player = starting_player

        try: # Added try-except block for better debugging within simulation
            # Initial check before loop
            initial_terminal_check = is_terminal_node(simulation_board)
            is_sim_terminal = initial_terminal_check

            while not is_sim_terminal:
                valid_moves = get_valid_locations(simulation_board)
                if not valid_moves:
                    is_sim_terminal = True; break

                board_for_q_eval = None
                if sim_player == PLAYER1_PIECE:
                    board_for_q_eval = simulation_board
                else:
                    board_for_q_eval = self.q_agent._flip_state(simulation_board)

                chosen_move, _ = self.q_agent.choose_action(board_for_q_eval)

                if chosen_move is None or chosen_move not in valid_moves:
                    chosen_move = random.choice(valid_moves)

                row = get_next_open_row(simulation_board, chosen_move)
                if row is None:
                     print(f"Warning: Q-Sim chose invalid move {chosen_move} - row is None.")
                     is_sim_terminal = True; break

                drop_piece(simulation_board, row, chosen_move, sim_player)
                sim_player = PLAYER2_PIECE if sim_player == PLAYER1_PIECE else PLAYER1_PIECE

                # Check terminal state *after* move
                loop_terminal_check = is_terminal_node(simulation_board)
                is_sim_terminal = loop_terminal_check

        except Exception as sim_e:
             print(f"\n!!! ERROR during simulation !!!")
             print(f"Player whose turn it was: {sim_player}")
             print(f"Board state where error occurred:\n{simulation_board}")
             print(f"Error details: {type(sim_e)} - {sim_e}")

             return -1 

        # --- Determine simulation result ---
        winner = None
        last_player = PLAYER2_PIECE if sim_player == PLAYER1_PIECE else PLAYER1_PIECE


        try:
             final_win_check_result = winning_move(simulation_board, last_player)

        except Exception as win_e:
             print(f"\n!!! ERROR during final win check !!!")
             print(f"Board state:\n{simulation_board}")
             print(f"Error details: {type(win_e)} - {win_e}")
             final_win_check_result = False # Assume no win on error

        if final_win_check_result:
            winner = last_player
        elif not get_valid_locations(simulation_board): # Check draw only if no winner
            pass

        sim_result = 0 # Draw
        if winner == self.player_id: sim_result = 1
        elif winner == self.opponent_id: sim_result = -1
        return sim_result


    def get_move(self, board):
        """ Determines the move using MCTS guided by Q-Agent simulations. """
        start_time = time.time()
        root = MCTSNode(state=board.copy(), player_at_node=self.player_id)

        valid_locations = get_valid_locations(board)


        # --- Immediate Win/Loss Check ---
        for col in valid_locations:
             temp_board_win = board.copy()
             row = get_next_open_row(temp_board_win, col)
             if row is not None:
                 drop_piece(temp_board_win, row, col, self.player_id)
                 win_check_result = winning_move(temp_board_win, self.player_id)
                 if win_check_result:
                     print(f"Hybrid Player {self.player_id}: Found immediate winning move {col}")
                     return col

        for col in valid_locations:
             temp_board_loss = board.copy()
             row = get_next_open_row(temp_board_loss, col)
             if row is not None:
                 drop_piece(temp_board_loss, row, col, self.opponent_id)
                 block_check_result = winning_move(temp_board_loss, self.opponent_id)
                 if block_check_result:
                     print(f"Hybrid Player {self.player_id}: Found immediate block at {col}")
                     return col


        # --- MCTS Main Loop ---
        for i in range(self.n_iterations):
            node = root
            current_board_state = board.copy()

            # 1. Selection
            while not node.untried_moves and node.children:
                node = node.uct_select_child(self.exploration_constant)
                if node is None: break
                row = get_next_open_row(current_board_state, node.move)
                if row is None: break
                drop_piece(current_board_state, row, node.move, node.parent.player_at_node)
            if node is None: continue

            # 2. Expansion
            try:
                 node_is_terminal = is_terminal_node(node.state)
            except Exception as e_debug:
                 print(f"DEBUG (Hybrid P{self.player_id}): Iter {i}, ERROR checking node terminal: {e_debug}")
                 node_is_terminal = True # Assume terminal on error

            # --- The potentially problematic line ---
            if node.untried_moves and not node_is_terminal:
                move = random.choice(node.untried_moves)
                current_player = node.player_at_node
                next_player = self.opponent_id if current_player == self.player_id else self.player_id
                row = get_next_open_row(current_board_state, move)
                if row is not None:
                    board_after_expansion = current_board_state.copy() # Copy state *before* dropping piece for the child node
                    drop_piece(board_after_expansion, row, move, current_player)
                    node = node.add_child(move, board_after_expansion, next_player) # Child node gets the state *after* the move
                    # Update current_board_state to reflect the expansion for the simulation start
                    current_board_state = board_after_expansion
                else:
                    node.untried_moves.remove(move); continue


            # 3. Simulation (using Q-Agent)
            simulation_result = self._simulate_with_q_agent(current_board_state, node.player_at_node)

            # 4. Backpropagation
            temp_node = node
            while temp_node is not None:
                 result_for_node = simulation_result if temp_node.player_at_node == self.player_id else -simulation_result
                 temp_node.update(result_for_node)
                 temp_node = temp_node.parent


        # --- Choose Best Move ---
        if not root.children:
             print(f"Hybrid Player {self.player_id}: Warning - No moves explored. Choosing random.")
             valid_fallback = get_valid_locations(board)
             return random.choice(valid_fallback) if valid_fallback else None

        best_child = max(root.children, key=lambda c: c.visits)
        best_move = best_child.move
        end_time = time.time()

        parent_win_rate_for_best_child = (-best_child.score / best_child.visits) if best_child.visits > 0 else 0.0
        win_rate_display = (parent_win_rate_for_best_child + 1) / 2 * 100

        print(f"Hybrid Player {self.player_id}: Chose column {best_move} "
              f"({best_child.visits} visits, ~WinRate: {win_rate_display:.1f}%, "
              f"Time: {end_time - start_time:.2f}s)")

        if best_move not in get_valid_locations(board):
             print(f"Hybrid Warning: Chosen best move {best_move} is invalid! Fallback.")
             valid_fallback = get_valid_locations(board)
             return random.choice(valid_fallback) if valid_fallback else None

        return best_move

print("MCTS_QAgent_Hybrid class defined (with debugging).")

MCTS_QAgent_Hybrid class defined (with debugging).


## THE GAME

In [61]:
# Game Runner
import numpy as np
import time



def play_connect4(player1: Player, player2: Player):
    """
    Runs a game of Connect 4 between two players.

    Args:
        player1 (Player): The player object for Player 1.
        player2 (Player): The player object for Player 2.
    """
    board = create_board()
    game_over = False
    turn = 0 # 0 for Player 1, 1 for Player 2

    print("--- Starting Connect 4 Game ---")
    print(f"Player 1 ({type(player1).__name__}) vs Player 2 ({type(player2).__name__})")
    print_board(board)

    while not game_over:
        current_player_obj = player1 if turn == 0 else player2
        player_piece = PLAYER1_PIECE if turn == 0 else PLAYER2_PIECE

        # Get move from the current player
        try:
            col = current_player_obj.get_move(board.copy()) # Pass a copy to prevent AI from cheating

            if is_valid_location(board, col):
                row = get_next_open_row(board, col)
                drop_piece(board, row, col, player_piece)

                print("-" * 20)
                print(f"Player {player_piece} placed piece in column {col}")
                print_board(board)

                if winning_move(board, player_piece):
                    print(f"\n!!! Player {player_piece} ({type(current_player_obj).__name__}) wins! !!!")
                    game_over = True

                elif len(get_valid_locations(board)) == 0:
                    print("\n!!! Game Over: It's a DRAW! !!!")
                    game_over = True
                else:
                    turn = 1 - turn # Switch turn

            else:

                print(f"!! Internal Error: Player {player_piece} chose invalid column {col}. Skipping turn. !!")


        except Exception as e:
            print(f"\n!! An error occurred during Player {player_piece}'s turn: {e} !!")
            print("Game cannot continue.")
            game_over = True

    print("--- Game Finished ---")

print("Game Runner Function `play_connect4` Loaded.")

Game Runner Function `play_connect4` Loaded.


## Human vs Human Game

In [27]:
# Run Human vs Human Game

# Make sure classes/functions are loaded from previous cells

# Ensure Player classes are instantiated with correct IDs
human1 = HumanPlayer(PLAYER1_PIECE)
human2 = HumanPlayer(PLAYER2_PIECE)

play_connect4(human1, human2)

--- Starting Connect 4 Game ---
Player 1 (HumanPlayer) vs Player 2 (HumanPlayer)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (0, 1, 2, 3, 4, 5, 6):  4


--------------------
Player 1 placed piece in column 4
|               |
|               |
|               |
|               |
|               |
|         X     |
+---------------+
  1 2 3 4 5 6 7



KeyboardInterrupt



## Human vs CNN-Minimax AI Game

In [33]:

# --- Configuration ---
cnn_model_file = 'connect4_cnn_model.h5'
ai_search_depth = 4 # Adjust as needed (higher = slower but potentially stronger)

# --- Create Players ---
human_player = HumanPlayer(PLAYER1_PIECE) # Human plays as Player 1
try:
    # AI plays as Player 2
    cnn_ai_player = CNNMinimaxPlayer(PLAYER2_PIECE, model_path=cnn_model_file, search_depth=ai_search_depth)

    # --- Start Game ---
    if cnn_ai_player.model is not None: # Only play if model loaded correctly
         play_connect4(human_player, cnn_ai_player)
    else:
        print("Cannot start game: CNN AI model failed to load.")

except NameError:
    print("Error: Make sure CNNMinimaxPlayer class is defined (run Cell 4).")
except Exception as e:
    print(f"An error occurred setting up the Human vs AI game: {e}")





CNN Model loaded successfully from connect4_cnn_model.h5 for Player 2
Model ready.
--- Starting Connect 4 Game ---
Player 1 (HumanPlayer) vs Player 2 (CNNMinimaxPlayer)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               |
|               |
|       X       |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Running Minimax (depth 4)...
AI Player 2: Chose column 1 (Score: 1.00, Time: 19.52s)
--------------------
Player 2 placed piece in column 1
|               |
|               |
|               |
|               |
|               |
|   O   X       |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  


Invalid input. Please enter a number.


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               |
|       X       |
|   O   X       |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Running Minimax (depth 4)...
AI Player 2: Chose column 2 (Score: 1.00, Time: 33.86s)
--------------------
Player 2 placed piece in column 2
|               |
|               |
|               |
|               |
|       X       |
|   O O X       |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  1


--------------------
Player 1 placed piece in column 0
|               |
|               |
|               |
|               |
|       X       |
| X O O X       |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Running Minimax (depth 4)...
AI Player 2: Chose column 4 (Score: 1.00, Time: 51.40s)
--------------------
Player 2 placed piece in column 4
|               |
|               |
|               |
|               |
|       X       |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|       X       |
|       X       |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Blocking opponent win in column 3
--------------------
Player 2 placed piece in column 3
|               |
|               |
|       O       |
|       X       |
|       X       |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  1


--------------------
Player 1 placed piece in column 0
|               |
|               |
|       O       |
|       X       |
| X     X       |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Running Minimax (depth 4)...
AI Player 2: Chose column 1 (Score: 1.00, Time: 27.66s)
--------------------
Player 2 placed piece in column 1
|               |
|               |
|       O       |
|       X       |
| X O   X       |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  5


--------------------
Player 1 placed piece in column 4
|               |
|               |
|       O       |
|       X       |
| X O   X X     |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Running Minimax (depth 4)...
AI Player 2: Chose column 1 (Score: 0.99, Time: 20.14s)
--------------------
Player 2 placed piece in column 1
|               |
|               |
|       O       |
|   O   X       |
| X O   X X     |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  2


--------------------
Player 1 placed piece in column 1
|               |
|               |
|   X   O       |
|   O   X       |
| X O   X X     |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Running Minimax (depth 4)...
AI Player 2: Chose column 1 (Score: 0.98, Time: 28.74s)
--------------------
Player 2 placed piece in column 1
|               |
|   O           |
|   X   O       |
|   O   X       |
| X O   X X     |
| X O O X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  6


--------------------
Player 1 placed piece in column 5
|               |
|   O           |
|   X   O       |
|   O   X       |
| X O   X X     |
| X O O X O X   |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Running Minimax (depth 4)...
AI Player 2: Chose column 2 (Score: 1.00, Time: 40.87s)
--------------------
Player 2 placed piece in column 2
|               |
|   O           |
|   X   O       |
|   O   X       |
| X O O X X     |
| X O O X O X   |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  7


--------------------
Player 1 placed piece in column 6
|               |
|   O           |
|   X   O       |
|   O   X       |
| X O O X X     |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Running Minimax (depth 4)...
AI Player 2: Chose column 3 (Score: 0.99, Time: 35.52s)
--------------------
Player 2 placed piece in column 3
|               |
|   O   O       |
|   X   O       |
|   O   X       |
| X O O X X     |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  1


--------------------
Player 1 placed piece in column 0
|               |
|   O   O       |
|   X   O       |
| X O   X       |
| X O O X X     |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Blocking opponent win in column 0
--------------------
Player 2 placed piece in column 0
|               |
|   O   O       |
| O X   O       |
| X O   X       |
| X O O X X     |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  6


--------------------
Player 1 placed piece in column 5
|               |
|   O   O       |
| O X   O       |
| X O   X       |
| X O O X X X   |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Blocking opponent win in column 6
--------------------
Player 2 placed piece in column 6
|               |
|   O   O       |
| O X   O       |
| X O   X       |
| X O O X X X O |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  6


--------------------
Player 1 placed piece in column 5
|               |
|   O   O       |
| O X   O       |
| X O   X   X   |
| X O O X X X O |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Blocking opponent win in column 5
--------------------
Player 2 placed piece in column 5
|               |
|   O   O       |
| O X   O   O   |
| X O   X   X   |
| X O O X X X O |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  7


--------------------
Player 1 placed piece in column 6
|               |
|   O   O       |
| O X   O   O   |
| X O   X   X X |
| X O O X X X O |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7
AI Player 2: Blocking opponent win in column 4
--------------------
Player 2 placed piece in column 4
|               |
|   O   O       |
| O X   O   O   |
| X O   X O X X |
| X O O X X X O |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  7


--------------------
Player 1 placed piece in column 6
|               |
|   O   O       |
| O X   O   O X |
| X O   X O X X |
| X O O X X X O |
| X O O X O X X |
+---------------+
  1 2 3 4 5 6 7

!!! Player 1 (HumanPlayer) wins! !!!
--- Game Finished ---


## Random AI vs Random AI Game

In [37]:

print("\n--- Setting up Random AI vs Random AI Game ---")

try:
    # --- Create Players ---
    # Create an instance of RandomAIPlayer for Player 1
    random_ai_1 = RandomAIPlayer(PLAYER1_PIECE)

    # Create another instance of RandomAIPlayer for Player 2
    random_ai_2 = SlightlyBetterRandomAIPlayer(PLAYER2_PIECE)

    # --- Start Game ---
    print(f"Starting game: {type(random_ai_1).__name__} (P1) vs {type(random_ai_2).__name__} (P2)")
    play_connect4(random_ai_1, random_ai_2)

except NameError as e:
     print(f"Error: A required class or function is not defined: {e}")
     print("Please ensure Cells 1, 5, and 6 have been executed successfully.")
except Exception as e:
     print(f"An unexpected error occurred setting up the Random AI vs Random AI game: {e}")


--- Setting up Random AI vs Random AI Game ---
SlightlyBetterRandomAIPlayer initialized for Player 2 (Opponent: 1)
Starting game: RandomAIPlayer (P1) vs SlightlyBetterRandomAIPlayer (P2)
--- Starting Connect 4 Game ---
Player 1 (RandomAIPlayer) vs Player 2 (SlightlyBetterRandomAIPlayer)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7
Random AI Player 1: Chose column 1
--------------------
Player 1 placed piece in column 1
|               |
|               |
|               |
|               |
|               |
|   X           |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 2 placed piece in column 4
|               |
|               |
|               |
|               |
|               |
|   X     O     |
+---------------+
  1 2 3 4 5 6 7
Random AI Player 1: Chose column 0
--------------------
Player 1 placed piece in column 0
|               |
|               |
|             

## Human vs MCTS AI

In [69]:
import time


print("\n--- Setting up Human vs MCTS AI Game ---")

# --- Configuration ---
# Adjust the number of iterations for MCTS. Higher values mean stronger AI but slower moves.
mcts_iterations = 2000  

# Choose who plays first (PLAYER1_PIECE goes first)
human_player_id = PLAYER1_PIECE
mcts_player_id = PLAYER2_PIECE


# --- Create Players ---
try:
    # Create the Human player instance
    human_player = HumanPlayer(human_player_id)
    print(f"Human player created as Player {human_player_id}")

    # Create the MCTS AI player instance
    mcts_ai_opponent = MCTSPlayer(player_id=mcts_player_id, iterations=mcts_iterations)
    print(f"MCTS AI player created as Player {mcts_player_id} with {mcts_iterations} iterations.")

    # --- Start Game ---
    print("\nStarting game...")
    # Determine the order based on assigned IDs
    if human_player_id == PLAYER1_PIECE:
        play_connect4(human_player, mcts_ai_opponent)
    else:
        play_connect4(mcts_ai_opponent, human_player)

except NameError as e:
     print(f"\nError: A required class or function is not defined: {e}")
     print("Please ensure Cells 1, 2, 6, and 14 have been executed successfully.")
except Exception as e:
     print(f"\nAn unexpected error occurred setting up the Human vs MCTS game: {e}")


--- Setting up Human vs MCTS AI Game ---
Human player created as Player 1
MCTSPlayer initialized for Player 2 (2000 iterations/move, Heuristic Playouts, Corrected UCT)
MCTS AI player created as Player 2 with 2000 iterations.

Starting game...
--- Starting Connect 4 Game ---
Player 1 (HumanPlayer) vs Player 2 (MCTSPlayer)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  3


--------------------
Player 1 placed piece in column 2
|               |
|               |
|               |
|               |
|               |
|     X         |
+---------------+
  1 2 3 4 5 6 7
MCTS Player 2: Chose column 1 (529 visits, ~WinRate: 49.1%, Time: 47.87s)
--------------------
Player 2 placed piece in column 1
|               |
|               |
|               |
|               |
|               |
|   O X         |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  2


--------------------
Player 1 placed piece in column 1
|               |
|               |
|               |
|               |
|   X           |
|   O X         |
+---------------+
  1 2 3 4 5 6 7
MCTS Player 2: Chose column 2 (461 visits, ~WinRate: 43.8%, Time: 43.24s)
--------------------
Player 2 placed piece in column 2
|               |
|               |
|               |
|               |
|   X O         |
|   O X         |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               |
|   X O         |
|   O X X       |
+---------------+
  1 2 3 4 5 6 7
MCTS Player 2: Chose column 4 (610 visits, ~WinRate: 50.5%, Time: 35.59s)
--------------------
Player 2 placed piece in column 4
|               |
|               |
|               |
|               |
|   X O         |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  5


--------------------
Player 1 placed piece in column 4
|               |
|               |
|               |
|               |
|   X O   X     |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7
MCTS Player 2: Chose column 2 (1529 visits, ~WinRate: 62.7%, Time: 23.64s)
--------------------
Player 2 placed piece in column 2
|               |
|               |
|               |
|     O         |
|   X O   X     |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  3


--------------------
Player 1 placed piece in column 2
|               |
|               |
|     X         |
|     O         |
|   X O   X     |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7
MCTS Player 2: Chose column 4 (551 visits, ~WinRate: 54.9%, Time: 18.47s)
--------------------
Player 2 placed piece in column 4
|               |
|               |
|     X         |
|     O   O     |
|   X O   X     |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|               |
|     X         |
|     O   O     |
|   X O X X     |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7
MCTS Player 2: Chose column 3 (1939 visits, ~WinRate: 93.1%, Time: 4.82s)
--------------------
Player 2 placed piece in column 3
|               |
|               |
|     X         |
|     O O O     |
|   X O X X     |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  2


--------------------
Player 1 placed piece in column 1
|               |
|               |
|     X         |
|   X O O O     |
|   X O X X     |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7
MCTS Player 2: Found immediate winning move 4
--------------------
Player 2 placed piece in column 4
|               |
|               |
|     X   O     |
|   X O O O     |
|   X O X X     |
|   O X X O     |
+---------------+
  1 2 3 4 5 6 7

!!! Player 2 (MCTSPlayer) wins! !!!
--- Game Finished ---


## Q-Agent vs MCTS Player

In [173]:
import time


print("\n--- Setting up Q-Agent (P1) vs MCTS Player (P2) Game ---")

# --- Configuration ---
# Ensure this matches the file saved during Q-agent training
Q_TABLE_FILE = "connect4_q_agent_selfplay_gen2.pkl"

# Adjust the number of iterations for MCTS. Higher values mean stronger AI but slower moves.
MCTS_ITERATIONS = 2000  

# --- Initialize Players ---
q_agent_player1 = None
mcts_player2 = None
players_ready = True

# 1. Initialize Q-Learning Agent (Player 1)
try:
    print(f"Loading Q-Agent for Player 1 from '{Q_TABLE_FILE}'...")
    q_agent_player1 = QLearningAgent(player_id=PLAYER1_PIECE) # Trained as P1, playing as P1

    # Load the trained Q-table
    q_agent_player1.load_q_table(Q_TABLE_FILE)

    # Check if loading was successful or table is empty
    if not q_agent_player1.q_table:
        print(f"WARNING: Q-table file '{Q_TABLE_FILE}' not found or loaded empty.")
        print("Q-Agent will likely perform poorly (randomly or based on heuristics only).")
        # players_ready = False # Optional: Decide if you want to stop if Q-table is bad

    # Set agent to evaluation mode (no exploration, no learning)
    q_agent_player1.is_learning = False
    q_agent_player1.epsilon = 0.0
    print("Q-Agent (P1) initialized in evaluation mode.")

except NameError:
    print("\nERROR: 'QLearningAgent' class not defined.")
    print("Please ensure the cell containing the QLearningAgent class definition (e.g., Cell 16) has been executed.")
    players_ready = False
except FileNotFoundError:
    print(f"\nERROR: Q-table file '{Q_TABLE_FILE}' not found.")
    print("Cannot run game without the trained Q-table.")
    players_ready = False
except Exception as e:
    print(f"\nAn unexpected error occurred setting up the Q-Agent: {e}")
    players_ready = False

# 2. Initialize MCTS Player (Player 2)
if players_ready: # Only proceed if Q-agent setup was okay (or warning ignored)
    try:
        print(f"\nInitializing MCTS Player for Player 2 ({MCTS_ITERATIONS} iterations)...")
        mcts_player2 = MCTSPlayer(player_id=PLAYER2_PIECE, iterations=MCTS_ITERATIONS)
        print("MCTS Player (P2) initialized.")

    except NameError:
        print("\nERROR: 'MCTSPlayer' class not defined.")
        print("Please ensure the cell containing the MCTSPlayer class definition (e.g., Cell 14 - corrected version) has been executed.")
        players_ready = False
    except Exception as e:
        print(f"\nAn unexpected error occurred setting up the MCTS Player: {e}")
        players_ready = False

# --- Run Game ---
if players_ready and q_agent_player1 and mcts_player2:
    try:
        print("\nStarting game: Q-Agent (P1) vs MCTS Player (P2)")
        play_connect4(q_agent_player1, mcts_player2)
    except NameError:
         print("\nERROR: 'play_connect4' function not defined.")
         print("Please ensure the cell containing the play_connect4 function (e.g., Cell 6) has been executed.")
    except Exception as e:
         print(f"\nAn unexpected error occurred during the game: {e}")
else:
    print("\nGame setup failed. Cannot start the match.")


--- Setting up Q-Agent (P1) vs MCTS Player (P2) Game ---
Loading Q-Agent for Player 1 from 'connect4_q_agent_selfplay_gen2.pkl'...
Q-table loaded successfully from connect4_q_agent_selfplay_gen2.pkl (24526 entries)
Q-Agent (P1) initialized in evaluation mode.

Initializing MCTS Player for Player 2 (2000 iterations)...
MCTSPlayer initialized for Player 2 (2000 iterations/move, Heuristic Playouts, Corrected UCT)
MCTS Player (P2) initialized.

Starting game: Q-Agent (P1) vs MCTS Player (P2)
--- Starting Connect 4 Game ---
Player 1 (QLearningAgent) vs Player 2 (MCTSPlayer)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               |
|               |
|       X       |
+---------------+
  1 2 3 4 5 6 7
MCTS Player 2: Chose column 3 (478 visits, ~WinRate: 43.8%, Time: 47.50s)
----------

## New Q-Agent (Self-Play) vs Original Q-Agent

In [138]:
import time


print("\n--- Setting up New Q-Agent (Self-Play) vs Original Q-Agent Game ---")

# --- Configuration ---
# File for the agent trained via self-play
NEW_Q_TABLE_FILE = "connect4_q_agent_selfplay_gen2.pkl"
# File for the original agent
ORIGINAL_Q_TABLE_FILE = "connect4_q_agent_heuristic.pkl"

# Assign Player IDs
# Let's have the newer agent play as P1 and the original as P2
NEW_AGENT_PLAYER_ID = PLAYER1_PIECE
ORIGINAL_AGENT_PLAYER_ID = PLAYER2_PIECE

# --- Initialize Players ---
q_agent_new = None      # Agent from self-play
q_agent_original = None # Agent from first training round
players_ready = True

# 1. Initialize Newer Agent (e.g., Player 1)
try:
    print(f"\nLoading Newer Q-Agent ({NEW_AGENT_PLAYER_ID}) policy from: {NEW_Q_TABLE_FILE}")
    q_agent_new = QLearningAgent(player_id=NEW_AGENT_PLAYER_ID)

    q_agent_new.load_q_table(NEW_Q_TABLE_FILE)
    if not q_agent_new.q_table:
        print(f"WARNING: Newer Q-table file '{NEW_Q_TABLE_FILE}' not found or loaded empty.")
        players_ready = False # Stop if the agent we want to test is missing

    # Set to EVALUATION mode and VERBOSE
    q_agent_new.is_learning = False
    q_agent_new.epsilon = 0.0
    q_agent_new.verbose = True
    print(f"Newer Q-Agent (P{NEW_AGENT_PLAYER_ID}) initialized in evaluation mode (VERBOSE).")

except NameError:
    print("\nERROR: 'QLearningAgent' class not defined.")
    players_ready = False
except FileNotFoundError:
    print(f"\nERROR: Newer Q-table file '{NEW_Q_TABLE_FILE}' not found.")
    players_ready = False
except Exception as e:
    print(f"\nAn unexpected error occurred setting up the Newer Q-Agent: {e}")
    players_ready = False


# 2. Initialize Original Agent (e.g., Player 2)
if players_ready:
    try:
        print(f"\nLoading Original Q-Agent ({ORIGINAL_AGENT_PLAYER_ID}) policy from: {ORIGINAL_Q_TABLE_FILE}")
        # *** Assign the PLAYER 2 ID here ***
        q_agent_original = QLearningAgent(player_id=ORIGINAL_AGENT_PLAYER_ID)

        q_agent_original.load_q_table(ORIGINAL_Q_TABLE_FILE)
        if not q_agent_original.q_table:
            print(f"WARNING: Original Q-table file '{ORIGINAL_Q_TABLE_FILE}' not found or loaded empty.")
            players_ready = False # Stop if the opponent is missing

        # Set to EVALUATION mode and VERBOSE
        q_agent_original.is_learning = False
        q_agent_original.epsilon = 0.0
        q_agent_original.verbose = True # <<< Enable detailed printing for this agent too
        print(f"Original Q-Agent (P{ORIGINAL_AGENT_PLAYER_ID}) initialized in evaluation mode (VERBOSE).")
        print(f"NOTE: Agent P{ORIGINAL_AGENT_PLAYER_ID} will use state-flipping as it's playing as P2.")


    except NameError:
        print("\nERROR: 'QLearningAgent' class not defined.")
        players_ready = False
    except FileNotFoundError:
        print(f"\nERROR: Original Q-table file '{ORIGINAL_Q_TABLE_FILE}' not found.")
        players_ready = False
    except Exception as e:
        print(f"\nAn unexpected error occurred setting up the Original Q-Agent: {e}")
        players_ready = False


# --- Run Game ---
if players_ready and q_agent_new and q_agent_original:
    try:
        print("\nStarting game: New Q-Agent vs Original Q-Agent")
        # Ensure the player order matches the assigned IDs
        if NEW_AGENT_PLAYER_ID == PLAYER1_PIECE:
            play_connect4(q_agent_new, q_agent_original)
        else:
            play_connect4(q_agent_original, q_agent_new) # Should not happen with current assignment

    except NameError:
         print("\nERROR: 'play_connect4' function not defined.")
         print("Please ensure the cell containing the play_connect4 function (e.g., Cell 6) has been executed.")
    except Exception as e:
         print(f"\nAn unexpected error occurred during the game: {e}")
else:
    print("\nGame setup failed. Cannot start the match.")


--- Setting up New Q-Agent (Self-Play) vs Original Q-Agent Game ---

Loading Newer Q-Agent (1) policy from: connect4_q_agent_selfplay_gen2.pkl
Q-table loaded successfully from connect4_q_agent_selfplay_gen2.pkl (24526 entries)
Newer Q-Agent (P1) initialized in evaluation mode (VERBOSE).

Loading Original Q-Agent (2) policy from: connect4_q_agent_heuristic.pkl
Q-table loaded successfully from connect4_q_agent_heuristic.pkl (307021 entries)
Original Q-Agent (P2) initialized in evaluation mode (VERBOSE).
NOTE: Agent P2 will use state-flipping as it's playing as P2.

Starting game: New Q-Agent vs Original Q-Agent
--- Starting Connect 4 Game ---
Player 1 (QLearningAgent) vs Player 2 (QLearningAgent)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7
Q-Agent 1: Chose column 3 (Exploit, ChosenQ: 1.4666, MaxQ: 1.4666, Time: 0.000s)
--------------------
Player 1 placed piece in column 3
|               |

## Human vs Q-Agent

In [140]:
print("\n--- Evaluating Trained Q-Agent ---")
try:
    # Load the trained agent
    trained_q_agent = QLearningAgent(player_id=PLAYER1_PIECE) # Or PLAYER2_PIECE if trained as P2
    trained_q_agent.load_q_table("connect4_q_agent_selfplay_gen2.pkl")
    trained_q_agent.is_learning = False # Ensure it's not learning
    trained_q_agent.epsilon = 0.0       # Ensure it's not exploring

    if not trained_q_agent.q_table:
        print("Warning: Q-table is empty. Agent has not been trained or failed to load.")
    else:
        # Play against human
        human_opponent = HumanPlayer(PLAYER2_PIECE if trained_q_agent.player_id == PLAYER1_PIECE else PLAYER1_PIECE)
        play_connect4(trained_q_agent, human_opponent)


except NameError as e:
     print(f"Error: A required class or function is not defined: {e}")
     print("Please ensure relevant cells (1, 2, 6, 16) are executed.")
except FileNotFoundError:
    print("Error: Cannot evaluate. Trained Q-table file 'connect4_q_agent_heuristic.pkl' not found.")
except Exception as e:
     print(f"An unexpected error occurred during evaluation: {e}")


--- Evaluating Trained Q-Agent ---
Q-table loaded successfully from connect4_q_agent_selfplay_gen2.pkl (24526 entries)
--- Starting Connect 4 Game ---
Player 1 (QLearningAgent) vs Player 2 (HumanPlayer)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               |
|               |
|       X       |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 4, 5, 6, 7):  


Invalid input. Please enter a number.


Player 2, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 2 placed piece in column 3
|               |
|               |
|               |
|               |
|       O       |
|       X       |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 4
|               |
|               |
|               |
|               |
|       O       |
|       X X     |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 4, 5, 6, 7):  3


--------------------
Player 2 placed piece in column 2
|               |
|               |
|               |
|               |
|       O       |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|       X       |
|       O       |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 2 placed piece in column 3
|               |
|               |
|       O       |
|       X       |
|       O       |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 3
|               |
|       X       |
|       O       |
|       X       |
|       O       |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 2 placed piece in column 3
|       O       |
|       X       |
|       O       |
|       X       |
|       O       |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 4
|       O       |
|       X       |
|       O       |
|       X       |
|       O X     |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 5, 6, 7):  5


--------------------
Player 2 placed piece in column 4
|       O       |
|       X       |
|       O       |
|       X O     |
|       O X     |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 4
|       O       |
|       X       |
|       O X     |
|       X O     |
|       O X     |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 5, 6, 7):  3


--------------------
Player 2 placed piece in column 2
|       O       |
|       X       |
|       O X     |
|       X O     |
|     O O X     |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 2
|       O       |
|       X       |
|       O X     |
|     X X O     |
|     O O X     |
|     O X X     |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 5, 6, 7):  7


--------------------
Player 2 placed piece in column 6
|       O       |
|       X       |
|       O X     |
|     X X O     |
|     O O X     |
|     O X X   O |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 2
|       O       |
|       X       |
|     X O X     |
|     X X O     |
|     O O X     |
|     O X X   O |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 5, 6, 7):  3


--------------------
Player 2 placed piece in column 2
|       O       |
|     O X       |
|     X O X     |
|     X X O     |
|     O O X     |
|     O X X   O |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 4
|       O       |
|     O X X     |
|     X O X     |
|     X X O     |
|     O O X     |
|     O X X   O |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 3, 5, 6, 7):  3


--------------------
Player 2 placed piece in column 2
|     O O       |
|     O X X     |
|     X O X     |
|     X X O     |
|     O O X     |
|     O X X   O |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 4
|     O O X     |
|     O X X     |
|     X O X     |
|     X X O     |
|     O O X     |
|     O X X   O |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 6, 7):  1


--------------------
Player 2 placed piece in column 0
|     O O X     |
|     O X X     |
|     X O X     |
|     X X O     |
|     O O X     |
| O   O X X   O |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 6
|     O O X     |
|     O X X     |
|     X O X     |
|     X X O     |
|     O O X   X |
| O   O X X   O |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 6, 7):  1


--------------------
Player 2 placed piece in column 0
|     O O X     |
|     O X X     |
|     X O X     |
|     X X O     |
| O   O O X   X |
| O   O X X   O |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 6
|     O O X     |
|     O X X     |
|     X O X     |
|     X X O   X |
| O   O O X   X |
| O   O X X   O |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 6, 7):  1


--------------------
Player 2 placed piece in column 0
|     O O X     |
|     O X X     |
|     X O X     |
| O   X X O   X |
| O   O O X   X |
| O   O X X   O |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 0
|     O O X     |
|     O X X     |
| X   X O X     |
| O   X X O   X |
| O   O O X   X |
| O   O X X   O |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 6, 7):  7


--------------------
Player 2 placed piece in column 6
|     O O X     |
|     O X X     |
| X   X O X   O |
| O   X X O   X |
| O   O O X   X |
| O   O X X   O |
+---------------+
  1 2 3 4 5 6 7
--------------------
Player 1 placed piece in column 1
|     O O X     |
|     O X X     |
| X   X O X   O |
| O   X X O   X |
| O   O O X   X |
| O X O X X   O |
+---------------+
  1 2 3 4 5 6 7


Player 2, choose column (1, 2, 6, 7):  2


--------------------
Player 2 placed piece in column 1
|     O O X     |
|     O X X     |
| X   X O X   O |
| O   X X O   X |
| O O O O X   X |
| O X O X X   O |
+---------------+
  1 2 3 4 5 6 7

!!! Player 2 (HumanPlayer) wins! !!!
--- Game Finished ---


## Hybrid MCTS+QAgent vs Standard MCTS

In [188]:
import time

print("\n--- Setting up Hybrid MCTS+QAgent vs Standard MCTS Game ---")

# --- Configuration ---
# Q-table for the Hybrid agent to use for simulations
Q_TABLE_TO_USE = "connect4_q_agent_selfplay_gen2.pkl" # Likely your strongest Q-table

# MCTS Iterations - You might need fewer for Hybrid due to smarter sims
HYBRID_ITERATIONS = 2000       
STANDARD_MCTS_ITERATIONS = 2000 

# Assign Player IDs (Example: Hybrid = P1, Standard MCTS = P2)
HYBRID_PLAYER_ID = PLAYER1_PIECE
STANDARD_MCTS_PLAYER_ID = PLAYER2_PIECE

# --- Initialize Players ---
hybrid_player = None
standard_mcts_player = None
players_ready = True

# 1. Initialize Hybrid Player
try:
    print(f"\nInitializing Hybrid Player ({HYBRID_PLAYER_ID}) with {HYBRID_ITERATIONS} iterations...")
    hybrid_player = MCTS_QAgent_Hybrid(
        player_id=HYBRID_PLAYER_ID,
        q_table_path=Q_TABLE_TO_USE,
        iterations=HYBRID_ITERATIONS
    )
    print("Hybrid Player initialized.")

except NameError as e:
    print(f"\nERROR: Required class not defined ({e}).")
    print("Ensure MCTS_QAgent_Hybrid and potentially QLearningAgent/MCTSNode are defined.")
    players_ready = False
except FileNotFoundError:
    print(f"\nERROR: Q-table file '{Q_TABLE_TO_USE}' not found for Hybrid Agent.")
    players_ready = False
except Exception as e:
    print(f"\nAn unexpected error occurred setting up the Hybrid Player: {e}")
    players_ready = False

# 2. Initialize Standard MCTS Player
if players_ready:
    try:
        print(f"\nInitializing Standard MCTS Player ({STANDARD_MCTS_PLAYER_ID}) with {STANDARD_MCTS_ITERATIONS} iterations...")
        standard_mcts_player = MCTSPlayer( # Use the standard MCTSPlayer class
            player_id=STANDARD_MCTS_PLAYER_ID,
            iterations=STANDARD_MCTS_ITERATIONS
        )
        print("Standard MCTS Player initialized.")

    except NameError as e:
        print(f"\nERROR: MCTSPlayer class not defined ({e}).")
        print("Please ensure the cell containing the MCTSPlayer class definition (e.g., Cell 14 - corrected version) has been executed.")
        players_ready = False
    except Exception as e:
        print(f"\nAn unexpected error occurred setting up the Standard MCTS Player: {e}")
        players_ready = False


# --- Run Game ---
if players_ready and hybrid_player and standard_mcts_player:
    try:
        print("\nStarting game: Hybrid MCTS+QAgent vs Standard MCTS")
        # Ensure the player order matches the assigned IDs
        if HYBRID_PLAYER_ID == PLAYER1_PIECE:
            play_connect4(hybrid_player, standard_mcts_player)
        else:
            play_connect4(standard_mcts_player, hybrid_player) # If Hybrid is P2

    except NameError:
         print("\nERROR: 'play_connect4' function not defined.")
         print("Please ensure the cell containing the play_connect4 function (e.g., Cell 6) has been executed.")
    except Exception as e:
         print(f"\nAn unexpected error occurred during the game: {e}")

else:
    print("\nGame setup failed. Cannot start the match.")


--- Setting up Hybrid MCTS+QAgent vs Standard MCTS Game ---

Initializing Hybrid Player (1) with 2000 iterations...
Initializing Hybrid Agent (Player 1)
Q-table loaded successfully from connect4_q_agent_selfplay_gen2.pkl (24526 entries)
Internal Q-Agent loaded (24526 entries) for Hybrid Player 1.
Hybrid Player initialized.

Initializing Standard MCTS Player (2) with 2000 iterations...
MCTSPlayer initialized for Player 2 (2000 iterations/move, Heuristic Playouts, Corrected UCT)
Standard MCTS Player initialized.

Starting game: Hybrid MCTS+QAgent vs Standard MCTS
--- Starting Connect 4 Game ---
Player 1 (MCTS_QAgent_Hybrid) vs Player 2 (MCTSPlayer)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 1: Chose column 3 (1164 visits, ~WinRate: 63.5%, Time: 10.98s)
--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               

## Hybrid MCTS+QAgent vs Human

In [205]:
import time


print("\n--- Setting up Human (P2) vs Hybrid MCTS+QAgent (P1) Game ---")

# --- Configuration ---
# Select the Q-table file the Hybrid agent should use
Q_TABLE_TO_USE = "connect4_q_agent_selfplay_gen2.pkl" # Or "connect4_q_agent_heuristic.pkl"

# Set the number of MCTS iterations for the Hybrid player
HYBRID_ITERATIONS = 1500

# Assign Player IDs
HYBRID_PLAYER_ID = PLAYER1_PIECE 
HUMAN_PLAYER_ID = PLAYER2_PIECE  

# --- Initialize Players ---
hybrid_player = None
human_player = None
players_ready = True

# 1. Initialize Hybrid Player (Player 1)
try:
    print(f"\nInitializing Hybrid Player ({HYBRID_PLAYER_ID}) with {HYBRID_ITERATIONS} iterations...")
    hybrid_player = MCTS_QAgent_Hybrid(
        player_id=HYBRID_PLAYER_ID,
        q_table_path=Q_TABLE_TO_USE,
        iterations=HYBRID_ITERATIONS
    )

    print("Hybrid Player initialized.")

except NameError as e:
    print(f"\nERROR: Required class not defined ({e}).")
    print("Ensure MCTS_QAgent_Hybrid, QLearningAgent, MCTSNode classes are defined.")
    players_ready = False
except FileNotFoundError:
    print(f"\nERROR: Q-table file '{Q_TABLE_TO_USE}' not found for Hybrid Agent.")
    players_ready = False
except Exception as e:
    print(f"\nAn unexpected error occurred setting up the Hybrid Player: {e}")
    players_ready = False

# 2. Initialize Human Player (Player 2)
if players_ready:
    try:
        print(f"\nInitializing Human Player ({HUMAN_PLAYER_ID})...")
        human_player = HumanPlayer(player_id=HUMAN_PLAYER_ID) # Assign P2
        print("Human Player initialized.")
    except NameError:
        print("\nERROR: HumanPlayer class not defined.")
        print("Please ensure the cell containing the HumanPlayer class definition (e.g., Cell 2) has been executed.")
        players_ready = False
    except Exception as e:
        print(f"\nAn unexpected error occurred setting up the Human Player: {e}")
        players_ready = False


# --- Run Game ---
if players_ready and hybrid_player and human_player:
    try:
        print("\nStarting game: Hybrid MCTS+QAgent (P1) vs Human (P2)")
        # Player 1 (Hybrid) goes first, Player 2 (Human) goes second
        play_connect4(hybrid_player, human_player)
    except NameError:
         print("\nERROR: 'play_connect4' function not defined.")
         print("Please ensure the cell containing the play_connect4 function (e.g., Cell 6) has been executed.")
    except Exception as e:
         print(f"\nAn unexpected error occurred during the game: {e}")

else:
    print("\nGame setup failed. Cannot start the match.")


--- Setting up Human (P2) vs Hybrid MCTS+QAgent (P1) Game ---

Initializing Hybrid Player (2) with 1500 iterations...
Initializing Hybrid Agent (Player 2)
Q-table loaded successfully from connect4_q_agent_selfplay_gen2.pkl (24526 entries)
Internal Q-Agent loaded (24526 entries) for Hybrid Player 2.
Hybrid Player initialized.

Initializing Human Player (1)...
Human Player initialized.

Starting game: Hybrid MCTS+QAgent (P1) vs Human (P2)
--- Starting Connect 4 Game ---
Player 1 (MCTS_QAgent_Hybrid) vs Player 2 (HumanPlayer)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 3 (499 visits, ~WinRate: 64.9%, Time: 7.80s)
--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               |
|               |
|       X       |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 2 placed piece in column 3
|               |
|               |
|               |
|               |
|       O       |
|       X       |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 3 (545 visits, ~WinRate: 64.9%, Time: 6.56s)
--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|       X       |
|       O       |
|       X       |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  6


--------------------
Player 2 placed piece in column 5
|               |
|               |
|               |
|       X       |
|       O       |
|       X   O   |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 3 (903 visits, ~WinRate: 60.7%, Time: 5.82s)
--------------------
Player 1 placed piece in column 3
|               |
|               |
|       X       |
|       X       |
|       O       |
|       X   O   |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  6


--------------------
Player 2 placed piece in column 5
|               |
|               |
|       X       |
|       X       |
|       O   O   |
|       X   O   |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 3 (517 visits, ~WinRate: 59.3%, Time: 4.41s)
--------------------
Player 1 placed piece in column 3
|               |
|       X       |
|       X       |
|       X       |
|       O   O   |
|       X   O   |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  6


--------------------
Player 2 placed piece in column 5
|               |
|       X       |
|       X       |
|       X   O   |
|       O   O   |
|       X   O   |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Found immediate winning move 5
--------------------
Player 1 placed piece in column 5
|               |
|       X       |
|       X   X   |
|       X   O   |
|       O   O   |
|       X   O   |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  7


--------------------
Player 2 placed piece in column 6
|               |
|       X       |
|       X   X   |
|       X   O   |
|       O   O   |
|       X   O O |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Found immediate block at 3
--------------------
Player 1 placed piece in column 3
|       X       |
|       X       |
|       X   X   |
|       X   O   |
|       O   O   |
|       X   O O |
+---------------+
  1 2 3 4 5 6 7

!!! Player 1 (MCTS_QAgent_Hybrid) wins! !!!
--- Game Finished ---


In [210]:
import time


print("\n--- Setting up Human (P1) vs Hybrid MCTS+QAgent (P2) Game ---") 

# --- Configuration ---\n",

Q_TABLE_TO_USE = "connect4_q_agent_selfplay_gen2.pkl" 

# Set the number of MCTS iterations for the Hybrid player
HYBRID_ITERATIONS = 1500

# Assign Player IDs (Human = P1, AI = P2)
HUMAN_PLAYER_ID = PLAYER1_PIECE  
HYBRID_PLAYER_ID = PLAYER2_PIECE 

# --- Initialize Players ---
hybrid_player = None
human_player = None
players_ready = True

# 1. Initialize Hybrid Player (Now Player 2)
try:
    # Pass the correct player ID (PLAYER2_PIECE)
    print(f"\nInitializing Hybrid Player ({HYBRID_PLAYER_ID}) with {HYBRID_ITERATIONS} iterations...")
    hybrid_player = MCTS_QAgent_Hybrid(
        player_id=HYBRID_PLAYER_ID, # <-- Use the updated variable
        q_table_path=Q_TABLE_TO_USE,
        iterations=HYBRID_ITERATIONS
    )
    print("Hybrid Player initialized.")

except NameError as e:
    print(f"\nERROR: Required class not defined ({e}).")
    print("Ensure MCTS_QAgent_Hybrid, QLearningAgent, MCTSNode classes are defined.")
    players_ready = False
except FileNotFoundError:
    print(f"\nERROR: Q-table file '{Q_TABLE_TO_USE}' not found for Hybrid Agent.")
    players_ready = False
except Exception as e:
    print(f"\nAn unexpected error occurred setting up the Hybrid Player: {e}")
    players_ready = False

# 2. Initialize Human Player (Now Player 1)
if players_ready:
    try:
        # Pass the correct player ID (PLAYER1_PIECE)
        print(f"\nInitializing Human Player ({HUMAN_PLAYER_ID})...")
        human_player = HumanPlayer(player_id=HUMAN_PLAYER_ID) # <-- Use the updated variable
        print("Human Player initialized.")
    except NameError:
        print("\nERROR: HumanPlayer class not defined.")
        print("Please ensure the cell containing the HumanPlayer class definition (e.g., Cell 2) has been executed.")
        players_ready = False
    except Exception as e:
        print(f"\nAn unexpected error occurred setting up the Human Player: {e}")
        players_ready = False


# --- Run Game ---
if players_ready and hybrid_player and human_player:
    try:
        # Update the starting game message
        print("\nStarting game: Human (P1) vs Hybrid MCTS+QAgent (P2)") # <-- UPDATED TITLE

        if HYBRID_PLAYER_ID == PLAYER1_PIECE: 
            play_connect4(hybrid_player, human_player)
        else: 
            play_connect4(human_player, hybrid_player)

    except NameError:
         print("\nERROR: 'play_connect4' function not defined.")
         print("Please ensure the cell containing the play_connect4 function (e.g., Cell 6) has been executed.")
    except Exception as e:
         print(f"\nAn unexpected error occurred during the game: {e}")

else:
    print("\nGame setup failed. Cannot start the match.")




--- Setting up Human (P1) vs Hybrid MCTS+QAgent (P2) Game ---

Initializing Hybrid Player (2) with 1500 iterations...
Initializing Hybrid Agent (Player 2)
Q-table loaded successfully from connect4_q_agent_selfplay_gen2.pkl (24526 entries)
Internal Q-Agent loaded (24526 entries) for Hybrid Player 2.
Hybrid Player initialized.

Initializing Human Player (1)...
Human Player initialized.

Starting game: Human (P1) vs Hybrid MCTS+QAgent (P2)
--- Starting Connect 4 Game ---
Player 1 (HumanPlayer) vs Player 2 (MCTS_QAgent_Hybrid)
|               |
|               |
|               |
|               |
|               |
|               |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               |
|               |
|       X       |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 4 (686 visits, ~WinRate: 40.1%, Time: 7.20s)
--------------------
Player 2 placed piece in column 4
|               |
|               |
|               |
|               |
|               |
|       X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|               |
|               |
|               |
|       X       |
|       X O     |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 3 (1107 visits, ~WinRate: 40.2%, Time: 5.86s)
--------------------
Player 2 placed piece in column 3
|               |
|               |
|               |
|       O       |
|       X       |
|       X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|               |
|       X       |
|       O       |
|       X       |
|       X O     |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 1 (535 visits, ~WinRate: 40.1%, Time: 6.26s)
--------------------
Player 2 placed piece in column 1
|               |
|               |
|       X       |
|       O       |
|       X       |
|   O   X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  4


--------------------
Player 1 placed piece in column 3
|               |
|       X       |
|       X       |
|       O       |
|       X       |
|   O   X O     |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 4 (1106 visits, ~WinRate: 53.0%, Time: 4.13s)
--------------------
Player 2 placed piece in column 4
|               |
|       X       |
|       X       |
|       O       |
|       X O     |
|   O   X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  5


--------------------
Player 1 placed piece in column 4
|               |
|       X       |
|       X       |
|       O X     |
|       X O     |
|   O   X O     |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 4 (804 visits, ~WinRate: 46.1%, Time: 3.91s)
--------------------
Player 2 placed piece in column 4
|               |
|       X       |
|       X O     |
|       O X     |
|       X O     |
|   O   X O     |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  7


--------------------
Player 1 placed piece in column 6
|               |
|       X       |
|       X O     |
|       O X     |
|       X O     |
|   O   X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 2 (423 visits, ~WinRate: 49.1%, Time: 2.90s)
--------------------
Player 2 placed piece in column 2
|               |
|       X       |
|       X O     |
|       O X     |
|       X O     |
|   O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  3


--------------------
Player 1 placed piece in column 2
|               |
|       X       |
|       X O     |
|       O X     |
|     X X O     |
|   O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 1 (1013 visits, ~WinRate: 39.5%, Time: 3.03s)
--------------------
Player 2 placed piece in column 1
|               |
|       X       |
|       X O     |
|       O X     |
|   O X X O     |
|   O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  2


--------------------
Player 1 placed piece in column 1
|               |
|       X       |
|       X O     |
|   X   O X     |
|   O X X O     |
|   O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 4 (436 visits, ~WinRate: 33.0%, Time: 2.84s)
--------------------
Player 2 placed piece in column 4
|               |
|       X O     |
|       X O     |
|   X   O X     |
|   O X X O     |
|   O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 4, 5, 6, 7):  5


--------------------
Player 1 placed piece in column 4
|         X     |
|       X O     |
|       X O     |
|   X   O X     |
|   O X X O     |
|   O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 3 (862 visits, ~WinRate: 35.8%, Time: 1.64s)
--------------------
Player 2 placed piece in column 3
|       O X     |
|       X O     |
|       X O     |
|   X   O X     |
|   O X X O     |
|   O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 6, 7):  1


--------------------
Player 1 placed piece in column 0
|       O X     |
|       X O     |
|       X O     |
|   X   O X     |
|   O X X O     |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 6 (683 visits, ~WinRate: 29.0%, Time: 1.44s)
--------------------
Player 2 placed piece in column 6
|       O X     |
|       X O     |
|       X O     |
|   X   O X     |
|   O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 6, 7):  7


--------------------
Player 1 placed piece in column 6
|       O X     |
|       X O     |
|       X O     |
|   X   O X   X |
|   O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 6 (713 visits, ~WinRate: 27.6%, Time: 1.45s)
--------------------
Player 2 placed piece in column 6
|       O X     |
|       X O     |
|       X O   O |
|   X   O X   X |
|   O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 6, 7):  1


--------------------
Player 1 placed piece in column 0
|       O X     |
|       X O     |
|       X O   O |
|   X   O X   X |
| X O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 6 (772 visits, ~WinRate: 22.2%, Time: 1.20s)
--------------------
Player 2 placed piece in column 6
|       O X     |
|       X O   O |
|       X O   O |
|   X   O X   X |
| X O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 6, 7):  2


--------------------
Player 1 placed piece in column 1
|       O X     |
|       X O   O |
|   X   X O   O |
|   X   O X   X |
| X O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 1 (690 visits, ~WinRate: 16.7%, Time: 0.65s)
--------------------
Player 2 placed piece in column 1
|       O X     |
|   O   X O   O |
|   X   X O   O |
|   X   O X   X |
| X O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 2, 3, 6, 7):  2


--------------------
Player 1 placed piece in column 1
|   X   O X     |
|   O   X O   O |
|   X   X O   O |
|   X   O X   X |
| X O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Chose column 6 (538 visits, ~WinRate: 6.7%, Time: 0.39s)
--------------------
Player 2 placed piece in column 6
|   X   O X   O |
|   O   X O   O |
|   X   X O   O |
|   X   O X   X |
| X O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7


Player 1, choose column (1, 3, 6):  3


--------------------
Player 1 placed piece in column 2
|   X   O X   O |
|   O   X O   O |
|   X   X O   O |
|   X X O X   X |
| X O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7
Hybrid Player 2: Found immediate winning move 2
--------------------
Player 2 placed piece in column 2
|   X   O X   O |
|   O   X O   O |
|   X O X O   O |
|   X X O X   X |
| X O X X O   O |
| X O O X O   X |
+---------------+
  1 2 3 4 5 6 7

!!! Player 2 (MCTS_QAgent_Hybrid) wins! !!!
--- Game Finished ---


In [None]:
3
