In [1]:
import random
import pandas as pd

In [2]:
class Board(list):
    def __init__(self, board=None):
        arr = [[cell for cell in row] for row in board] if board else [[" " for _ in range(7)] for _ in range(6)]
        super().__init__(arr)
    
    def to_string(self):
        line_break = '–––––––––––––––––––––––––––––'
        rows = ['| ' + ' | '.join([cell for cell in row]) + ' |' for row in self]
        output = line_break + '\n' + f'\n{line_break}\n'.join(rows) + '\n' + line_break
        return output
    
    def to_tuple(self):
        tup = tuple([tuple([cell for cell in row]) for row in self])
        return tup
    
#     def from_tuple(self, tup):
#         arr = [[cell for cell in row] for row in self]
#         board = Board(arr)
            
    def __repr__(self):
        output = self.to_string()
        return output
    
    def __str__(self):
        output = self.to_string()
        return output
    
    def __hash__(self):
        hashed_tup = hash(self.to_tuple())
        return hashed_tup

In [3]:
class Player():
    def __init__(self, player=None):
        self.player = player if player else 0
        self.piece = 'X' if self.player == 0 else 'O'

In [4]:
class Connect4:
    def __init__(self, board=None, current_player=None):
#         self.board = [row.copy() for row in board] if board else [[" " for _ in range(7)] for _ in range(6)]
        self.board = Board(board) if board else Board()
        self.current_player = current_player if current_player else 0
        self.piece = 'X' if self.current_player == 0 else 'O'

    def is_valid_move(self, column):
        return 0 <= column < 7 and self.board[0][column] == " "
    
    def get_valid_moves(self):
        moves = [3, 2, 4, 1, 5, 0, 6] # range(7)
        valid_moves = [move for move in moves if self.is_valid_move(move)]
        return valid_moves

    def make_move(self, column):
        for row in range(5, -1, -1):
            if self.board[row][column] == " ":
                self.board[row][column] = self.piece
                self.current_player = 1 - self.current_player
                self.piece = 'X' if self.current_player == 0 else 'O'
                return True
        print('Invalid column')
        return False
                
    def check_winner(self):
        # Check horizontal
        for row in range(6):
            for col in range(4):
                if all(self.board[row][col + i] == 'X' for i in range(4)):
                    return 0
                elif all(self.board[row][col + i] == 'O' for i in range(4)):
                    return 1

        # Check vertical
        for row in range(3):
            for col in range(7):
                if all(self.board[row + i][col] == 'X' for i in range(4)):
                    return 0
                elif all(self.board[row + i][col] == 'O' for i in range(4)):
                    return 1

        # Check diagonals (positive slope)
        for row in range(3):
            for col in range(4):
                if all(self.board[row + i][col + i] == 'X' for i in range(4)):
                    return 0
                elif all(self.board[row + i][col + i] == 'O' for i in range(4)):
                    return 1

        # Check diagonals (negative slope)
        for row in range(3):
            for col in range(3, 7):
                if all(self.board[row + i][col - i] == 'X' for i in range(4)):
                    return 0
                elif all(self.board[row + i][col - i] == 'O' for i in range(4)):
                    return 1

        return -1

    def is_board_full(self):
        return all(cell != " " for row in self.board for cell in row)

    def is_game_over(self):
        if self.is_board_full() or self.check_winner() != -1:
            return True
        return False

    def battle(self, agent1, agent2, output=True):
        while True:
            if output:
                print(self.board)
            
#             try:
            if self.current_player == 0:
                column = agent1.choose_move(self)
                if output:
                    print(f"{agent1} chooses column {column}.")
            else:
                column = agent2.choose_move(self)
                if output:
                    print(f"{agent2} chooses column {column}.")

            if self.is_valid_move(column):
                self.make_move(column)
                
                check_winner = self.check_winner()
                if check_winner != -1:
                    if output:
                        print(self.board)
                    winner = agent1 if check_winner == 0 else agent2
                    print(f"{winner} wins!")
                    break
                elif self.is_board_full():
                    if output:
                        print(self.board)
                    print("It's a draw!")
                    break

#                 self.current_player = 1 - self.current_player
#                 self.piece = 'X' if self.current_player == 0 else 'O'
            else:
                print("Invalid move. Try again.")

In [5]:
# class Player():
#     def __init__(self, player=None):
#         self.player
#         self.piece = 'X' if player.piece == 0 else 'O'

In [6]:
class Human:
    def __str__(self):
        return "Human"
    
    def __repr__(self):
        return "Human"
    
    def choose_move(self, game):
        message = f"Player {game.current_player+1}, choose a column (0-6): "
        column = int(input(message))
        return column

In [7]:
class RandomAgent:
    def __str__(self):
        return "Random Agent"
    
    def __repr__(self):
        return "Random Agent"
    
    def choose_move(self, game):
        valid_moves = game.get_valid_moves()
        column = random.choice(valid_moves)
        return column

In [8]:
class NegamaxAgent:
    def __init__(self, depth):
        self.depth = depth
        self.lookup = {}
        
    def __str__(self):
        return f"Negamax Agent {self.depth}"
    
    def __repr__(self):
        return f"Negamax Agent {self.depth}"

    # A simple evaluation function that computes the difference in scores
    def evaluate_board(self, board, color):
        windows = []
                
        # Check horizontal
        for row in range(6):
            for col in range(4):
                window = [board[row][col + i] for i in range(4)]
                windows.append(window)

        # Check vertical
        for row in range(3):
            for col in range(7):
                window = [board[row + i][col] for i in range(4)]
                windows.append(window)

        # Check diagonals (positive slope)
        for row in range(3):
            for col in range(4):
                window = [board[row + i][col + i] for i in range(4)]
                windows.append(window)

        # Check diagonals (negative slope)
        for row in range(3):
            for col in range(3, 7):
                window = [board[row + i][col - i] for i in range(4)]
                windows.append(window)        
    
        weights = {1: 1, 2: 3, 3: 9, 4: 81}
        
        evaluation = 0
        for length, weight in weights.items():
            count = {}
            for piece in ('X','O'):
                count[piece] = 0
                for window in windows:
                    if window.count(piece) == length and window.count(" ") == (4 - length):
                        count[piece] += 1
            evaluation += weight * (count['X'] - count['O'])
        evaluation *= color
        
#         print(f'{"".join(["-" for _ in range(self.depth + 1)]) + ">"} Evaluation {evaluation}')
                                
        return evaluation

    def negamax(self, node, depth, alpha, beta, color):
        if depth == 0 or not node.get_valid_moves() or node.check_winner() > -1:
            return -1, self.evaluate_board(node.board, color)

        move_evals = {}
        for move in node.get_valid_moves():
            child_node = Connect4(node.board, node.current_player)
            child_node.make_move(move)
#             print(f'{"".join(["-" for _ in range(self.depth - depth + 1)]) + ">"} Depth {depth}, Player {node.current_player+1}, Move {move}')
#             display_board(child_node.board)     
            
            eval = float('-inf')
            lookup_key = (Board(node.board), depth, color, move)
            if lookup_key in self.lookup:
                eval = self.lookup[lookup_key]
            else:
                eval = -self.negamax(child_node, depth-1, -beta, -alpha, -color)[1]
                self.lookup[lookup_key] = eval
            move_evals[move] = eval
            alpha = max(alpha, eval)
            if beta <= alpha: # Beta cut-off
                break
                
            if depth == self.depth:
                print(move, eval)
                
        max_eval = max(move_evals.values())
        max_moves = [move for move, eval in move_evals.items() if eval == max_eval]
        best_move = max_moves[0] # random.choice(max_moves) # max_moves[0]
        
        return best_move, max_eval

    def choose_move(self, node):
        color = 1 - 2 * node.current_player # Player 0 -> 1, Player 1 -> -1
        best_move, max_eval = self.negamax(node, self.depth, float('-inf'), float('inf'), color)
        return best_move

In [11]:
class Connect4Q(Connect4):
    def battle_q(self, q_agent, opponent_agent, q_agent_first=True, output=True):
        while not self.is_game_over():
            current_agent = q_agent if self.current_player == (0 if q_agent_first else 1) else opponent_agent
            is_q_agent_turn = isinstance(current_agent, QLearningAgent)

            # Get the current state before making a move
            old_state = q_agent.generate_state(self) if is_q_agent_turn else None
            
            # Agent makes a move
            column = current_agent.choose_move(self)
            if output:
                print(f"{current_agent} chooses column {column}.")

            # Store the board before the move to calculate rewards
            self.make_move(column)

            # Calculate reward and update Q-table if it's q_agent's turn
            if is_q_agent_turn:
#                 print(old_state)
                reward = self.calculate_reward(old_state, q_agent, opponent_agent)
                new_state = q_agent.generate_state(self)
                q_agent.learn(old_state, column, reward, new_state)
                
            if output:
                print(self.board)
                
        q_agent.update_exploration_rate()
        
        winner = self.check_winner()
        if self.is_board_full():
            return 0
        elif (winner == 0 and q_agent_first) or (winner == 1 and not q_agent_first):
            return 1
        else:
            return -1

        # Assign endgame rewards based on q_agent's position
#         self.assign_endgame_rewards(q_agent, opponent_agent, q_agent_first, output)

    def calculate_reward(self, old_state, q_agent, other_agent):
        # Implement the logic to calculate the intermediate rewards
        color = 1 - 2 * self.current_player # Player 0 -> 1, Player 1 -> -1
        reward = NegamaxAgent(0).evaluate_board(self.board, color)
#         print(reward)
        return reward

In [12]:
class QLearningAgent:
    def __init__(self, learning_rate=0.1, discount_factor=0.99, exploration_rate=1.0, exploration_decay=0.9999):
        self.q_table = {}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay

    def choose_move(self, game):
        state = self.generate_state(game)
        if random.random() < self.exploration_rate:
            move = random.choice(game.get_valid_moves())
        else:
            move = self.best_known_move(state, game.get_valid_moves())
        return move

    def learn(self, old_state, action, reward, new_state):
        # Calculate the max future value
        valid_moves = Connect4(Board(new_state[0])).get_valid_moves()
        future_rewards = [self.q_table.get((new_state, a), 0) for a in valid_moves]
        max_future_reward = max(future_rewards) if future_rewards else 0

        # Q-learning update rule
        old_value = self.q_table.get((old_state, action), 0)   
        new_value = old_value + self.learning_rate * (reward + self.discount_factor * max_future_reward - old_value)
        self.q_table[(old_state, action)] = new_value

    def generate_state(self, game):
        # Convert the game board to a tuple and include the current player
        board = Board(game.board)
        return (board, game.current_player)

    def best_known_move(self, state, valid_moves):
        best_move = None
        max_value = float('-inf')

        for move in valid_moves:
            value = self.q_table.get((state, move), 0)
            if value > max_value:
                max_value = value
                best_move = move

        # If all moves have the same value or state is new, choose randomly
        return best_move if best_move is not None else random.choice(valid_moves)

    def update_exploration_rate(self):
        self.exploration_rate *= self.exploration_decay
        
    def __str__(self):
        return "Q-Learning Agent"
    
    def __repr__(self):
        return "Q-Learning Agent"

# Additional methods and adjustments can be added as needed, especially to handle rewards and game-specific logic.

In [15]:
human = Human()
random_agent = RandomAgent()
negamax_agent = NegamaxAgent(8)
q_learning_agent = QLearningAgent()

In [16]:
connect4 = Connect4()
connect4.battle(human, negamax_agent)

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
Player 1, choose a column (0-6): 3
Human chooses column 3.
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   |
–––––––––––––––––––––––––––––
3 -10
2 -10
4 -10
1 -10
5 -10
0 -10
6 -10
Negamax Agent 8 chooses column 3.
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––

Player 1, choose a column (0-6): 1
Human chooses column 1.
–––––––––––––––––––––––––––––
|   |   |   | O | X |   |   |
–––––––––––––––––––––––––––––
|   |   |   | X | O |   |   |
–––––––––––––––––––––––––––––
|   | X |   | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O |   | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X |   | O | X |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | O | O | X |
–––––––––––––––––––––––––––––
2 -18
1 1
5 0
0 0
6 1
Negamax Agent 8 chooses column 1.
–––––––––––––––––––––––––––––
|   |   |   | O | X |   |   |
–––––––––––––––––––––––––––––
|   | O |   | X | O |   |   |
–––––––––––––––––––––––––––––
|   | X |   | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O |   | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X |   | O | X |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | O | O | X |
–––––––––––––––––––––––––––––
Player 1, choose a column (0-6): 1
Human chooses column 1.
–––––––––––––––––––––––––––––
|   | X |   | O 

In [268]:
connect4.battle(human, negamax_agent)

–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O 

Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O

–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O 

–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––

Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––

Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O

Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O

Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––

Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
––––––––––––

Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O

Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O

Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––

Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––

–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O 

–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––

Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O

Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O | O | O | O |   |   |
–––––––––––––––––––––––––––––
|   | O | X | X | X |   |   |
–––––––––––––––––––––––––––––
|   | X | X | O | O | X |   |
–––––––––––––––––––––––––––––
|   | O | O | X | X | X |   |
–––––––––––––––––––––––––––––
|   | O | X | O | O | X |   |
–––––––––––––––––––––––––––––
| X | O | X | X | X | O |   |
–––––––––––––––––––––––––––––
Negamax Agent 10 chooses column -1.
Invalid move. Try again.
–––––––––––––––––––––––––––––
|   | O

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [237]:
for episode in range(1,1001):
    connect4Q = Connect4Q()
    wi1nner = connect4Q.battle_q(q_learning_agent, negamax_agent, q_agent_first=False, output=False)
    print(episode, q_learning_agent.exploration_rate, len(q_learning_agent.q_table), winner)

1 0.9999 11 -1
2 0.9998000100000001 23 -1
3 0.9997000299990001 26 -1
4 0.9996000599960002 35 -1
5 0.9995000999900007 41 -1
6 0.9994001499800017 47 -1
7 0.9993002099650037 51 -1
8 0.9992002799440072 54 -1
9 0.9991003599160128 56 -1
10 0.9990004498800211 61 -1
11 0.9989005498350332 65 -1
12 0.9988006597800497 67 -1
13 0.9987007797140718 70 -1
14 0.9986009096361004 76 -1
15 0.9985010495451367 77 -1
16 0.9984011994401822 80 -1
17 0.9983013593202382 82 -1
18 0.9982015291843062 85 -1
19 0.9981017090313877 91 -1
20 0.9980018988604846 94 -1
21 0.9979020986705985 97 -1
22 0.9978023084607315 104 -1
23 0.9977025282298854 114 -1
24 0.9976027579770624 119 -1
25 0.9975029977012647 131 -1
26 0.9974032474014946 134 -1
27 0.9973035070767544 136 -1
28 0.9972037767260468 139 -1
29 0.9971040563483742 140 -1
30 0.9970043459427393 146 -1
31 0.9969046455081451 148 -1
32 0.9968049550435942 149 -1
33 0.9967052745480899 155 -1
34 0.9966056040206351 157 -1
35 0.9965059434602331 162 -1
36 0.9964062928658871 166 -

281 0.9722897666671897 788 -1
282 0.9721925376905229 788 -1
283 0.9720953184367539 792 -1
284 0.9719981089049102 798 -1
285 0.9719009090940197 803 -1
286 0.9718037190031104 803 -1
287 0.97170653863121 808 -1
288 0.9716093679773469 809 -1
289 0.9715122070405492 812 -1
290 0.9714150558198451 812 -1
291 0.9713179143142632 812 -1
292 0.9712207825228317 813 -1
293 0.9711236604445794 813 -1
294 0.971026548078535 815 -1
295 0.9709294454237271 815 -1
296 0.9708323524791846 815 -1
297 0.9707352692439367 816 -1
298 0.9706381957170124 820 -1
299 0.9705411318974407 820 -1
300 0.970444077784251 820 -1
301 0.9703470333764725 823 -1
302 0.9702499986731349 825 -1
303 0.9701529736732676 825 -1
304 0.9700559583759003 827 -1
305 0.9699589527800627 828 -1
306 0.9698619568847847 829 -1
307 0.9697649706890963 831 -1
308 0.9696679941920274 839 -1
309 0.9695710273926083 839 -1
310 0.969474070289869 839 -1
311 0.9693771228828401 839 -1
312 0.9692801851705518 839 -1
313 0.9691832571520348 839 -1
314 0.969086338

554 0.9461040087165637 1280 -1
555 0.9460093983156921 1280 -1
556 0.9459147973758605 1282 -1
557 0.9458202058961229 1282 -1
558 0.9457256238755333 1282 -1
559 0.9456310513131457 1288 -1
560 0.9455364882080144 1296 -1
561 0.9454419345591936 1296 -1
562 0.9453473903657377 1296 -1
563 0.9452528556267011 1304 -1
564 0.9451583303411385 1307 -1
565 0.9450638145081044 1307 -1
566 0.9449693081266536 1310 -1
567 0.9448748111958409 1316 -1
568 0.9447803237147213 1317 -1
569 0.9446858456823498 1319 -1
570 0.9445913770977816 1322 -1
571 0.9444969179600718 1322 -1
572 0.9444024682682759 1322 -1
573 0.9443080280214491 1323 -1
574 0.944213597218647 1323 -1
575 0.9441191758589251 1323 -1
576 0.9440247639413393 1328 -1
577 0.9439303614649451 1328 -1
578 0.9438359684287987 1328 -1
579 0.9437415848319558 1329 -1
580 0.9436472106734726 1330 -1
581 0.9435528459524052 1330 -1
582 0.94345849066781 1334 -1
583 0.9433641448187432 1334 -1
584 0.9432698084042613 1340 -1
585 0.9431754814234209 1342 -1
586 0.94308

823 0.9209918284199904 1719 -1
824 0.9208997292371484 1721 -1
825 0.9208076392642246 1722 -1
826 0.9207155585002982 1722 -1
827 0.9206234869444482 1722 -1
828 0.9205314245957538 1725 -1
829 0.9204393714532942 1725 -1
830 0.9203473275161489 1728 -1
831 0.9202552927833972 1728 -1
832 0.9201632672541189 1728 -1
833 0.9200712509273935 1729 -1
834 0.9199792438023008 1739 -1
835 0.9198872458779206 1739 -1
836 0.9197952571533328 1739 -1
837 0.9197032776276175 1739 -1
838 0.9196113072998547 1744 -1
839 0.9195193461691248 1744 -1
840 0.9194273942345079 1746 -1
841 0.9193354514950844 1746 -1
842 0.919243517949935 1748 -1
843 0.91915159359814 1753 -1
844 0.9190596784387802 1753 -1
845 0.9189677724709363 1761 -1
846 0.9188758756936892 1763 -1
847 0.9187839881061198 1763 -1
848 0.9186921097073092 1764 -1
849 0.9186002404963385 1767 -1
850 0.9185083804722889 1768 -1
851 0.9184165296342417 1768 -1
852 0.9183246879812783 1773 -1
853 0.9182328555124802 1773 -1
854 0.918141032226929 1773 -1
855 0.918049

In [238]:
connect4 = Connect4()
connect4.battle(human, q_learning_agent)

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
Player 1, choose a column (0-6): 3
Human chooses column 3.
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   |
–––––––––––––––––––––––––––––
Q-Learning Agent chooses column 3.
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
––––––

Player 1, choose a column (0-6): 2
Human chooses column 2.
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | O |   |   |   |
–––––––––––––––––––––––––––––
| O |   | X | X |   |   |   |
–––––––––––––––––––––––––––––
| X | X | O | X |   |   |   |
–––––––––––––––––––––––––––––
| O | X | X | O |   |   |   |
–––––––––––––––––––––––––––––
| X | X | O | X | O | O | O |
–––––––––––––––––––––––––––––
Q-Learning Agent chooses column 0.
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
| O |   |   | O |   |   |   |
–––––––––––––––––––––––––––––
| O |   | X | X |   |   |   |
–––––––––––––––––––––––––––––
| X | X | O | X |   |   |   |
–––––––––––––––––––––––––––––
| O | X | X | O |   |   |   |
–––––––––––––––––––––––––––––
| X | X | O | X | O | O | O |
–––––––––––––––––––––––––––––
Player 1, choose a column (0-6): 3
Human chooses column 3.
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   |
–––––––

In [115]:
for k, v in q_learning_agent.q_table.items():
    print(k)
    print(v)

((–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   |
–––––––––––––––––––––––––––––, 1), 3)
-0.30000000000000004
((–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | O |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   |
–––––––––––––––––––––––––––––, 1), 4)
0.30000000000000004
((–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |  

In [145]:
for k, v in q_learning_agent.q_table.items():
    print(k)
    print(v)

((–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   | X |   |   |   |   |   |
–––––––––––––––––––––––––––––, 1), 0)
0.9999999999999996
((–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
| O | X |   |   | X |   |   |
–––––––––––––––––––––––––––––, 1), 3)
-0.34390000000000004
((–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   

In [148]:
board = connect4.board
board

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   |
–––––––––––––––––––––––––––––

In [157]:
q_learning_agent.q_table[((board, 1),1)]

KeyError: ((–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   |
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   |
–––––––––––––––––––––––––––––, 1), 1)

In [187]:
len(q_learning_agent.q_table)

9877

In [110]:
human = Human()
random_agent = RandomAgent()
negamax_agent = NegamaxAgent(4)

connect4 = Connect4()
connect4.battle(human, negamax_agent)


–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
Player 1, choose a column (0-6): 3
Human chooses column 3.

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   | 
–––––––––––––––––––––––––––––
0 -1.1
1 -1.1
2 -1.0
3 -0.6000000000000001
4 -0.7
5 -0.6000000000000001
6 -0.6000000000000001
Negamax Agent 4 chooses column 3.

––––––––––––––––––

In [95]:
arr = [[1,2,3],[4,5,6],[7,8,9]]
arr

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [73]:
arr[0]

[1, 2, 3]

In [74]:
connect4.board

[[' ', ' ', ' ', ' ', ' ', ' ', ' '],
 [' ', ' ', ' ', ' ', ' ', ' ', ' '],
 [' ', ' ', ' ', ' ', ' ', ' ', ' '],
 ['O', ' ', ' ', 'X', ' ', ' ', ' '],
 ['O', ' ', ' ', 'X', ' ', ' ', ' '],
 ['O', ' ', ' ', 'X', ' ', ' ', ' ']]

In [76]:
connect4.board[2]

[' ', ' ', ' ', ' ', ' ', ' ', ' ']

In [77]:
for row in range(3):
    for col in range(7):
        window = [connect4.board[row + i][col] for i in range(4)]
        print(window)

[' ', ' ', ' ', 'O']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', 'X']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']
[' ', ' ', 'O', 'O']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']
[' ', ' ', 'X', 'X']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']
[' ', 'O', 'O', 'O']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']
[' ', 'X', 'X', 'X']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']
[' ', ' ', ' ', ' ']


In [128]:
def display_board(arr):
        print('\n–––––––––––––––––––––––––––––')
        for row in arr:
            print("|", end=" ")
            for cell in row:
                print(cell, end=" | ")
            print('\n–––––––––––––––––––––––––––––')

            
# Check horizontal
for row in range(6):
    for col in range(4):
        new_board = [[" " for _ in range(7)] for _ in range(6)]
        for i in range(4):
            new_board[row][col + i] = 'X'
        display_board(new_board)

# Check vertical
for row in range(3):
    for col in range(7):
        new_board = [[" " for _ in range(7)] for _ in range(6)]
        for i in range(4):
            new_board[row + i][col] = 'X'
        display_board(new_board)

# Check diagonals (positive slope)
for row in range(3):
    for col in range(4):
        new_board = [[" " for _ in range(7)] for _ in range(6)]
        for i in range(4):
            new_board[row + i][col + i] = 'X'
        display_board(new_board)

# Check diagonals (negative slope)
for row in range(3):
    for col in range(3, 7):
        new_board = [[" " for _ in range(7)] for _ in range(6)]
        for i in range(4):
            new_board[row + i][col - i] = 'X'
        display_board(new_board)


–––––––––––––––––––––––––––––
| X | X | X | X |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––

–––––––––––––––––––––––––––––
|   | X | X | X | X |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––

–––––––––––––––––––––––––––––
|   |   | X | X | X | X |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
––––––––––––––––––––––

|   |   |   | X |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   | X |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   | X |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   | X |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   | X |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   | X |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   | X |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   | X |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   | X |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
––––––––––––––––––––––

|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   | X |   |   | 
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   | 
–––––––––––––––––––––––––––––
|   |   | X |   |   |   |   | 
–––––––––––––––––––––––––––––
|   | X |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   | X |   | 
–––––––––––––––––––––––––––––
|   |   |   |   | X |   |   | 
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   | 
–––––––––––––––––––––––––––––
|   |   | X |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   | X | 
–––––––––––––––––––––––––––––
|   |   |   |   |   | X |   | 
–––––––––––––––––––––––––––––
|   |   |   |   | X |  

In [63]:
connect4.board

[[' ', ' ', ' ', ' ', ' ', ' ', ' '],
 [' ', 'X', ' ', ' ', ' ', ' ', ' '],
 ['X', 'X', 'X', ' ', ' ', ' ', ' '],
 ['O', 'O', 'X', 'X', ' ', ' ', ' '],
 ['O', 'O', 'X', 'X', ' ', 'O', ' '],
 ['O', 'O', 'O', 'X', ' ', 'O', ' ']]

In [67]:
potential_wins_1 = 0
potential_wins_2 = 0
for row in range(6):
    for col in range(4):
        window = [connect4.board[row][col + i] for i in range(4)]
        if window.count('X') == 3 and " " in window:
            potential_wins_1 += 1
        if window.count('O') == 3 and " " in window:
            potential_wins_2 += 1
potential_wins_1, potential_wins_2

(1, 0)

In [69]:
potential_wins_1 = 0
potential_wins_2 = 0

# Check horizontal
for row in range(6):
    for col in range(4):
        window = [connect4.board[row][col + i] for i in range(4)]
        if window.count('X') == 3 and " " in window:
            print('H')
            potential_wins_1 += 1
        if window.count('O') == 3 and " " in window:
            potential_wins_2 += 1

# Check vertical
for row in range(3):
    for col in range(7):
        window = [connect4.board[row + i][col] for i in range(4)]
        if window.count('X') == 3 and " " in window:
            print('V')
            potential_wins_1 += 1
        if window.count('O') == 3 and " " in window:
            potential_wins_2 += 1

# Check diagonals (positive slope)
for row in range(3):
    for col in range(4):
        window = [connect4.board[row + i][col + i] for i in range(4)]
        if window.count('X') == 3 and " " in window:
            print('D+')
            potential_wins_1 += 1
        if window.count('O') == 3 and " " in window:
            potential_wins_2 += 1

# Check diagonals (negative slope)
for row in range(3):
    for col in range(3, 7):
        window = [connect4.board[row + i][col - i] for i in range(4)]
        if window.count('X') == 3 and " " in window:
            print('D-')
            potential_wins_1 += 1
        if window.count('O') == 3 and " " in window:
            potential_wins_2 += 1

potential_wins_1, potential_wins_2

H
V
V
D+
D+
D+
D+


(7, 0)

In [None]:
board = connect4.board

for row in range(6):
    for col in range(4):
        if all(self.board[row][col + i] == self.piece for i in range(4)):
            return True

In [32]:
connect4 = Connect4()
connect4.battle()


–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
Player 1, choose a column (0-6): 3

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   | X |   |   |   | 
–––––––––––––––––––––––––––––
Player 2, choose a column (0-6): 4

–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––––––––––––––––––
|   |   |   |   |   |   |   | 
–––––––––––––