In [1]:
import random
import numpy as np
import pandas as pd
from mancala import Mancala
from agents.human import Human
from agents.random_agent import RandomAgent
from agents.simple_agent import SimpleAgent
from agents.minimax_agent import MiniMaxAgent
from agents.negamax_agent import NegamaxAgent

In [16]:
import random

class QLearningAgent:
    def __init__(self, learning_rate=0.1, discount_factor=0.9, exploration_rate=1.0, exploration_decay=0.99):
        self.q_table = {}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay

    def choose_move(self, game):
        state = self.generate_state(game)
        if random.random() < self.exploration_rate:
            move = random.choice(game.get_valid_moves())
        else:
            move = self.best_known_move(state, game.get_valid_moves())
        return move

    def learn(self, old_state, action, reward, new_state):
        # Calculate the max future value
        valid_moves = Mancala(list(new_state[0])).get_valid_moves()
        future_rewards = [self.q_table.get((new_state, a), 0) for a in valid_moves]
        max_future_reward = max(future_rewards) if future_rewards else 0

        # Q-learning update rule
        old_value = self.q_table.get((old_state, action), 0)        
        new_value = old_value + self.learning_rate * (reward + self.discount_factor * max_future_reward - old_value)
        self.q_table[(old_state, action)] = new_value

    def generate_state(self, game):
        # Convert the game board to a tuple and include the current player
        return (tuple(game.board), game.current_player)

    def best_known_move(self, state, valid_moves):
        best_move = None
        max_value = float('-inf')

        for move in valid_moves:
            value = self.q_table.get((state, move), 0)
            if value > max_value:
                max_value = value
                best_move = move

        # If all moves have the same value or state is new, choose randomly
        return best_move if best_move is not None else random.choice(valid_moves)

    def update_exploration_rate(self):
        self.exploration_rate *= self.exploration_decay
        
    def __str__(self):
        return "Q-Learning Agent"
    
    def __repr__(self):
        return "Q-Learning Agent"

# Additional methods and adjustments can be added as needed, especially to handle rewards and game-specific logic.

In [32]:
class MancalaQ(Mancala):
    def battle_q(self, q_agent, opponent_agent, q_agent_first=True, output=True):
        while not self.is_game_over():
            current_agent = q_agent if self.current_player == (0 if q_agent_first else 1) else opponent_agent
            is_q_agent_turn = isinstance(current_agent, QLearningAgent)

            # Get the current state before making a move
            old_state = q_agent.generate_state(self) if is_q_agent_turn else None

            # Agent makes a move
            pit_index = current_agent.choose_move(self)
            if output:
                print(f"{current_agent} chooses pit {pit_index}.")

            # Store the board before the move to calculate rewards
#             board_before_move = self.board.copy()
            self.make_move(pit_index)

            # Calculate reward and update Q-table if it's q_agent's turn
            if is_q_agent_turn:
                reward = self.calculate_reward(old_state, q_agent, opponent_agent)
                new_state = q_agent.generate_state(self)
                q_agent.learn(old_state, pit_index, reward, new_state)
                q_agent.update_exploration_rate()

            if output:
                self.display_board()

        # Assign endgame rewards based on q_agent's position
        self.assign_endgame_rewards(q_agent, opponent_agent, q_agent_first, output)

    def calculate_reward(self, old_state, q_agent, other_agent):
        # Implement the logic to calculate the intermediate rewards
        reward = 0
        
        current_agent = q_agent if self.current_player == (0 if q_agent_first else 1) else opponent_agent
        
#         old_board, old_player = old_state

#         # Assuming the Q-learning agent is player 0 and captures stones to store at index 6
#         player_store_index = 6 if q_agent.current_player == 0 else 13

#         # Reward for capturing stones: Difference in the store count
#         capture = False
#         indices = range(6) if q_agent.current_player == 1 else range(7,13)
#         for index in indices:
#             if board_before_move[index] > 0 and self.board[index] == 0:
#                 capture = True
#         if capture:
#             stones_captured = self.board[player_store_index] - board_before_move[player_store_index]
#             reward += stones_captured

        # Reward for gaining another turn
        if current_agent == q_agent and self.current_player == old_player:
            # Check if the last stone landed in the player's store          
#             if old_sa  # This method needs to be defined in your Mancala class
            reward += 5
        return reward  # Placeholder for reward calculation logic

    def assign_endgame_rewards(self, q_agent, opponent_agent, q_agent_first, output):
        player_store1, player_store2 = self.board[6], self.board[13]
        win_reward, lose_reward = 100, -100
        q_agent_wins = (player_store1 > player_store2) if q_agent_first else (player_store2 > player_store1)

        if q_agent_wins:
            final_reward = win_reward
            win_message = f"{q_agent} wins {player_store1}-{player_store2}" if q_agent_first else f"{q_agent} wins {player_store2}-{player_store1}"
        elif player_store1 == player_store2:
            final_reward = 0  # Reward for a draw
            win_message = "It's a tie!"
        else:
            final_reward = lose_reward
            win_message = f"{opponent_agent} wins {player_store2}-{player_store1}" if q_agent_first else f"{opponent_agent} wins {player_store1}-{player_store2}"

        if output:
            print(win_message)

#         # Update Q-table for the endgame state
#         old_state = q_agent.generate_state(self)
#         q_agent.learn(old_state, None, final_reward, None)  # Action is None because the game is over

# Note: You will still need to implement the reward calculation in the 'calculate_reward' method.

In [33]:
human = Human()
random_agent = RandomAgent()
simple_agent = SimpleAgent()
minimax_agent = MiniMaxAgent(4)
negamax_agent = NegamaxAgent(4)
q_learning_agent = QLearningAgent()

In [34]:
mancalaq = MancalaQ()

mancalaq.battle_q(q_learning_agent, simple_agent, q_agent_first=False, output=True)

Simple Agent chooses pit 2.
Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
Simple Agent chooses pit 5.
Current Player: 2
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_0_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 2 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_5_]  [_5_]  [_5_]  [_5_]        |
|________________________

NameError: name 'q_agent_first' is not defined

In [23]:
q_learning_agent.q_table

{(((4, 4, 0, 5, 5, 0, 2, 5, 5, 5, 5, 4, 4, 0), 1), 9): 0.0,
 (((0, 5, 1, 6, 6, 0, 8, 0, 5, 0, 6, 5, 5, 1), 1), 10): 0.0,
 (((1, 6, 2, 6, 0, 1, 9, 1, 6, 1, 1, 6, 6, 2), 1), 9): 0.0,
 (((1, 6, 0, 7, 0, 0, 17, 1, 0, 0, 2, 6, 6, 2), 1), 12): 0.0,
 (((2, 7, 1, 8, 0, 0, 19, 0, 0, 0, 2, 6, 0, 3), 1), 10): 0.0,
 (((0, 7, 1, 0, 1, 1, 20, 1, 1, 1, 1, 8, 0, 6), 1), 9): 0.0,
 (((0, 7, 1, 0, 0, 0, 23, 0, 1, 0, 2, 8, 0, 6), 1), 11): 0.0,
 (((1, 8, 2, 1, 0, 1, 24, 0, 1, 0, 2, 0, 1, 7), 1), 12): 0.0,
 (((1, 8, 2, 1, 0, 1, 24, 0, 1, 0, 2, 0, 0, 8), 1), 8): 0.0,
 (((1, 8, 0, 1, 1, 0, 25, 0, 0, 0, 2, 0, 0, 10), 1), 10): 0.0,
 (((0, 8, 0, 1, 0, 1, 25, 0, 0, 0, 0, 1, 0, 12), 1), 11): 0.0,
 (((0, 8, 0, 0, 1, 0, 26, 0, 0, 0, 0, 0, 1, 12), 1), 12): 0.0,
 (((4, 4, 0, 5, 5, 0, 2, 5, 5, 5, 5, 4, 4, 0), 1), 8): 0.0,
 (((4, 4, 0, 5, 5, 0, 2, 5, 0, 6, 6, 5, 5, 1), 1), 12): 0.0,
 (((0, 1, 3, 8, 7, 0, 10, 0, 0, 6, 6, 5, 0, 2), 1), 9): 0.0,
 (((1, 2, 3, 8, 0, 1, 11, 1, 1, 1, 8, 7, 1, 3), 1), 7): 0.0,
 (((1, 2, 3, 0, 1

In [24]:
q_learning_agent.exploration_rate

0.7105532272722921

In [3]:
mancala_game = Mancala()
agent1 = human
agent2 = negamax_agent
mancala_game.battle(agent1, agent2)

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
Player 1, choose a pit (0-5): 3
Human chooses pit 3.

Current Player: 2
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_0_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_5_]        |
|__________________________

KeyboardInterrupt: Interrupted by user

In [7]:
class QLearningAgent:
    def __init__(self, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1):
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.q_table = {}

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0.0)

    def choose_move(self, mancala_game): # state, valid_moves):
        next_state = tuple(mancala_game.board)
        valid_moves = mancala_game.get_valid_moves()
        
        self.update_q_value()
                    
                    
    def choose_move(self, state, valid_moves):            
        if np.random.rand() < self.exploration_rate:
            return np.random.choice(valid_moves)
        else:
            q_values = [self.get_q_value(state, move) for move in valid_moves]
            return valid_moves[np.argmax(q_values)]

    def update_q_value(self, state, action, reward, next_state):
        current_q = self.get_q_value(state, action)
        max_future_q = max([self.get_q_value(next_state, next_action) for next_action in range(6)])
        new_q = (1 - self.learning_rate) * current_q + self.learning_rate * (reward + self.discount_factor * max_future_q)
        self.q_table[(state, action)] = new_q

In [None]:
def q_learning_battle(agent1, agent2):
    mancala_game = Mancala()

    while True:
        mancala_game.display_board()

        try:
            if mancala_game.current_player == 0:
                pit_index = -1
                
                valid_moves = mancala_game.valid_moves()
                if agent1 == QLearningAgent(state, valid_mvoes):
                    
                pit_index = agent1.choose_move(mancala_game)
                print(f"{agent1} chooses pit {pit_index}.")
            else:
                pit_index = agent2.choose_move(mancala_game)
                print(f"{agent2} chooses pit {pit_index}.")
            print()
        except ValueError:
            print("Invalid input. Please enter a number.")
            continue

        mancala_game.make_move(pit_index)

        # Check for game over
        if sum(mancala_game.board[0:6]) == 0 or sum(mancala_game.board[7:13]) == 0:
            mancala_game.display_board()
            print("Game over!")

            player_store1 = mancala_game.board[6]
            player_store2 = mancala_game.board[13]

            if player_store1 > player_store2:
                print(f'{agent1} wins {player_store1}-{player_store2}')
            elif player_store1 < player_store2:
                print(f'{agent2} wins {player_store2}-{player_store1}')
            else:
                print('Tie!')
                
            return player_store1, player_store2

class MiniMaxAgent:
    def __init__(self, depth):
        self.depth = depth
        self.lookup = {}
        
    def __str__(self):
        return f"MiniMax Agent {self.depth}"
    
    def __repr__(self):
        return f"MiniMax Agent {self.depth}"

    # A simple evaluation function that computes the difference in scores
    def evaluate_board(self, board, player):
        player_store = 6 if player == 0 else 13 # player_store
        opponent_store = 13 if player == 0 else 6 # opponent_store
        # print('-- Player Store:', board[player_store], ', Opponent Store:', board[opponent_store])
        return board[player_store] - board[opponent_store]

    def minimax(self, node, depth, alpha, beta, maximizing_player):
        print(depth, maximizing_player)
        if depth == 0 or not node.get_valid_moves():
            player = node.current_player if maximizing_player else 1 - node.current_player
            return self.evaluate_board(node.board, player)

        if maximizing_player:
            max_eval = float('-inf')
            for move in node.get_valid_moves():
                child_node = Mancala(node.board, node.current_player)
                child_node.make_move(move)
                if child_node.current_player != node.current_player:
                    depth -= 1
                    maximizing_player = False
                eval = self.minimax(child_node, depth, alpha, beta, maximizing_player)
                max_eval = max(max_eval, eval)
                alpha = max(alpha, eval)
                if beta <= alpha: # Beta cut-off
                    break
            return max_eval
        else:
            min_eval = float('inf')
            for move in node.get_valid_moves():
                child_node = Mancala(node.board, node.current_player)
                child_node.make_move(move)
                if child_node.current_player != node.current_player:
                    depth -= 1
                    maximizing_player = True
                eval = self.minimax(child_node, depth, alpha, beta, maximizing_player)
                min_eval = min(min_eval, eval)
                beta = min(beta, eval)
                if beta <= alpha: # Alpha cut-off
                    break
            return min_eval

    def choose_move(self, node):
        valid_moves = node.get_valid_moves()
        move_evals = {}
        for move in valid_moves:
            child_node = Mancala(node.board, node.current_player)
            child_node.make_move(move)
            maximizing_player = True
            depth = self.depth
            if child_node.current_player != node.current_player:
                depth -= 1
                maximizing_player = False
            eval = self.minimax(child_node, depth, float('-inf'), float('inf'), maximizing_player)
            move_evals[move] = eval
            print(move, eval)
            
        max_eval = max(move_evals.values())
        max_moves = [move for move, eval in move_evals.items() if eval == max_eval]
        best_move = random.choice(max_moves) # max_moves[0]
        return best_move

In [95]:
## human = Human()
random_agent = RandomAgent()
simple_agent = SimpleAgent()
minimax_agent = MiniMaxAgent(6)

agent1 = human
agent2 = minimax_agent
battle(agent1, agent2)

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
Player 1, choose a pit (0-5): 2
Human chooses pit 2.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|__________________________

Player 1, choose a pit (0-5): 
Invalid input. Please enter a number.
Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_6_]  [_1_]  [_0_]  [_1_]  [_11_]  [_0_]   ___  |
| |   |                                            |   | |
| | 3 |                                            | 10 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_8_]  [_0_]  [_0_]  [_8_]  [_0_]  [_0_]        |
|________________________________________________________|
Player 1, choose a pit (0-5): 0
Human chooses pit 0.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_0_]  [_2_]  [_1_]  [_2_]  [_12_]  [_1_]   ___  |
| |   |                                            |   | |
| | 3 |                                            | 11 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_8_

KeyboardInterrupt: 

In [82]:
human = Human()
random_agent = RandomAgent()
simple_agent = SimpleAgent()
minimax_agent_6 = MiniMaxAgent(6)
minimax_agent_8 = MiniMaxAgent(9)

agent1 = minimax_agent_6
agent2 = minimax_agent_8
battle(agent1, agent2)

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
0 -5
1 -6
2 4
3 -2
4 -4
5 4
MiniMax Agent 6 chooses pit 2.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|____________________

9 -9
10 8
12 -9
MiniMax Agent 9 chooses pit 10.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_0_]  [_0_]  [_0_]  [_9_]  [_0_]  [_1_]   ___  |
| |   |                                            |   | |
| | 4 |                                            | 16 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_10_]  [_1_]  [_0_]  [_7_]  [_0_]  [_0_]        |
|________________________________________________________|
3 -8
5 -8
MiniMax Agent 6 chooses pit 3.

Current Player: 2
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_0_]  [_0_]  [_0_]  [_0_]  [_1_]  [_2_]   ___  |
| |   |                                            |   | |
| | 4 |                                            | 17 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_11_]  [_2_]  [_1_]  [_8_]  [_1_]  [

(19, 29)

In [38]:
human = Human()
random_agent = RandomAgent()
simple_agent = SimpleAgent()
minimax_agent = MiniMaxAgent(6)

agent1 = minimax_agent
agent2 = human
battle(agent1, agent2)

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
0 -5
1 -6
2 4
3 -2
4 -4
5 4
2 4
MiniMax Agent 6 chooses pit 2.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________

KeyboardInterrupt: Interrupted by user

In [15]:
human = Human()
random_agent = RandomAgent()
simple_agent = SimpleAgent()
minimax_agent_0 = MiniMaxAgent(0)
minimax_agent_1 = MiniMaxAgent(1)
minimax_agent_2 = MiniMaxAgent(2)
minimax_agent_3 = MiniMaxAgent(3)
minimax_agent_4 = MiniMaxAgent(4)
minimax_agent_5 = MiniMaxAgent(5)
minimax_agent_6 = MiniMaxAgent(6)
minimax_agent_7 = MiniMaxAgent(7)

agents = [
    random_agent,
    simple_agent,
#     minimax_agent_0,
    minimax_agent_1,
    minimax_agent_2,
    minimax_agent_3,
    minimax_agent_4,
    minimax_agent_5,
    minimax_agent_6,
#     minimax_agent_7
]

for agent in agents:
    print(agent)

Random Agent
Simple Agent
MiniMax Agent 1
MiniMax Agent 2
MiniMax Agent 3
MiniMax Agent 4
MiniMax Agent 5
MiniMax Agent 6


In [555]:
games = []
for agent1 in agents:
    for agent2 in agents:
        if agent1 == agent2:
            continue
        game = (agent1, agent2)
        games.append(game)
random.shuffle(games)
games

[(MiniMax Agent 1, MiniMax Agent 6),
 (MiniMax Agent 1, MiniMax Agent 5),
 (MiniMax Agent 2, MiniMax Agent 5),
 (MiniMax Agent 4, MiniMax Agent 1),
 (MiniMax Agent 3, MiniMax Agent 5),
 (Random Agent, MiniMax Agent 2),
 (MiniMax Agent 6, MiniMax Agent 2),
 (MiniMax Agent 6, Random Agent),
 (Simple Agent, MiniMax Agent 1),
 (MiniMax Agent 6, MiniMax Agent 5),
 (MiniMax Agent 3, MiniMax Agent 4),
 (MiniMax Agent 1, Random Agent),
 (Random Agent, MiniMax Agent 6),
 (Random Agent, MiniMax Agent 3),
 (MiniMax Agent 3, MiniMax Agent 6),
 (MiniMax Agent 5, Simple Agent),
 (MiniMax Agent 6, MiniMax Agent 3),
 (MiniMax Agent 6, Simple Agent),
 (MiniMax Agent 1, MiniMax Agent 4),
 (MiniMax Agent 4, MiniMax Agent 2),
 (MiniMax Agent 2, Simple Agent),
 (Random Agent, Simple Agent),
 (MiniMax Agent 3, MiniMax Agent 2),
 (MiniMax Agent 5, MiniMax Agent 1),
 (MiniMax Agent 3, Simple Agent),
 (MiniMax Agent 4, MiniMax Agent 3),
 (MiniMax Agent 2, MiniMax Agent 4),
 (Simple Agent, MiniMax Agent 6),
 (M

In [14]:
[(agent1, agent2) for agent1 in agents for agent2 in agents if agent1 != agent2]

[(Random Agent, Simple Agent),
 (Random Agent, MiniMax Agent 1),
 (Random Agent, MiniMax Agent 2),
 (Random Agent, MiniMax Agent 3),
 (Random Agent, MiniMax Agent 4),
 (Random Agent, MiniMax Agent 5),
 (Random Agent, MiniMax Agent 6),
 (Simple Agent, Random Agent),
 (Simple Agent, MiniMax Agent 1),
 (Simple Agent, MiniMax Agent 2),
 (Simple Agent, MiniMax Agent 3),
 (Simple Agent, MiniMax Agent 4),
 (Simple Agent, MiniMax Agent 5),
 (Simple Agent, MiniMax Agent 6),
 (MiniMax Agent 1, Random Agent),
 (MiniMax Agent 1, Simple Agent),
 (MiniMax Agent 1, MiniMax Agent 2),
 (MiniMax Agent 1, MiniMax Agent 3),
 (MiniMax Agent 1, MiniMax Agent 4),
 (MiniMax Agent 1, MiniMax Agent 5),
 (MiniMax Agent 1, MiniMax Agent 6),
 (MiniMax Agent 2, Random Agent),
 (MiniMax Agent 2, Simple Agent),
 (MiniMax Agent 2, MiniMax Agent 1),
 (MiniMax Agent 2, MiniMax Agent 3),
 (MiniMax Agent 2, MiniMax Agent 4),
 (MiniMax Agent 2, MiniMax Agent 5),
 (MiniMax Agent 2, MiniMax Agent 6),
 (MiniMax Agent 3, Rando

In [556]:
outcomes = {}
for num, game in enumerate(games):
    print(num, game)
    outcome = battle(*game, False)
    outcomes[game] = outcome
outcomes

0 (MiniMax Agent 1, MiniMax Agent 6)
MiniMax Agent 6 wins 32-16
1 (MiniMax Agent 1, MiniMax Agent 5)
MiniMax Agent 5 wins 28-20
2 (MiniMax Agent 2, MiniMax Agent 5)
MiniMax Agent 5 wins 25-23
3 (MiniMax Agent 4, MiniMax Agent 1)
MiniMax Agent 4 wins 34-14
4 (MiniMax Agent 3, MiniMax Agent 5)
MiniMax Agent 5 wins 27-21
5 (Random Agent, MiniMax Agent 2)
MiniMax Agent 2 wins 38-10
6 (MiniMax Agent 6, MiniMax Agent 2)
MiniMax Agent 6 wins 34-14
7 (MiniMax Agent 6, Random Agent)
MiniMax Agent 6 wins 39-9
8 (Simple Agent, MiniMax Agent 1)
Simple Agent wins 31-17
9 (MiniMax Agent 6, MiniMax Agent 5)
MiniMax Agent 6 wins 34-14
10 (MiniMax Agent 3, MiniMax Agent 4)
MiniMax Agent 3 wins 34-14
11 (MiniMax Agent 1, Random Agent)
MiniMax Agent 1 wins 39-9
12 (Random Agent, MiniMax Agent 6)
MiniMax Agent 6 wins 40-8
13 (Random Agent, MiniMax Agent 3)
MiniMax Agent 3 wins 40-8
14 (MiniMax Agent 3, MiniMax Agent 6)
MiniMax Agent 3 wins 28-20
15 (MiniMax Agent 5, Simple Agent)
MiniMax Agent 5 wins 27-2

{(MiniMax Agent 1, MiniMax Agent 6): (16, 32),
 (MiniMax Agent 1, MiniMax Agent 5): (20, 28),
 (MiniMax Agent 2, MiniMax Agent 5): (23, 25),
 (MiniMax Agent 4, MiniMax Agent 1): (34, 14),
 (MiniMax Agent 3, MiniMax Agent 5): (21, 27),
 (Random Agent, MiniMax Agent 2): (10, 38),
 (MiniMax Agent 6, MiniMax Agent 2): (34, 14),
 (MiniMax Agent 6, Random Agent): (39, 9),
 (Simple Agent, MiniMax Agent 1): (31, 17),
 (MiniMax Agent 6, MiniMax Agent 5): (34, 14),
 (MiniMax Agent 3, MiniMax Agent 4): (34, 14),
 (MiniMax Agent 1, Random Agent): (39, 9),
 (Random Agent, MiniMax Agent 6): (8, 40),
 (Random Agent, MiniMax Agent 3): (8, 40),
 (MiniMax Agent 3, MiniMax Agent 6): (28, 20),
 (MiniMax Agent 5, Simple Agent): (27, 21),
 (MiniMax Agent 6, MiniMax Agent 3): (35, 13),
 (MiniMax Agent 6, Simple Agent): (34, 14),
 (MiniMax Agent 1, MiniMax Agent 4): (19, 29),
 (MiniMax Agent 4, MiniMax Agent 2): (33, 15),
 (MiniMax Agent 2, Simple Agent): (31, 17),
 (Random Agent, Simple Agent): (9, 39),
 (Mi

In [581]:
rows = []
for game, score in outcomes.items():
    agent1, agent2 = game
    score1, score2 = score
    row = {'Agent 1': agent1, 'Score 1': score1, 'Agent 2': agent2, 'Score 2': score2}
    rows.append(row)
df_outcomes = pd.DataFrame(rows)
df_outcomes

Unnamed: 0,Agent 1,Score 1,Agent 2,Score 2
0,MiniMax Agent 1,16,MiniMax Agent 6,32
1,MiniMax Agent 1,20,MiniMax Agent 5,28
2,MiniMax Agent 2,23,MiniMax Agent 5,25
3,MiniMax Agent 4,34,MiniMax Agent 1,14
4,MiniMax Agent 3,21,MiniMax Agent 5,27
5,Random Agent,10,MiniMax Agent 2,38
6,MiniMax Agent 6,34,MiniMax Agent 2,14
7,MiniMax Agent 6,39,Random Agent,9
8,Simple Agent,31,MiniMax Agent 1,17
9,MiniMax Agent 6,34,MiniMax Agent 5,14


In [557]:
standings = {agent: {'W':0, 'L':0, 'T':0, 'PF': 0, 'PA': 0} for agent in agents}
standings

{Random Agent: {'W': 0, 'L': 0, 'T': 0, 'PF': 0, 'PA': 0},
 Simple Agent: {'W': 0, 'L': 0, 'T': 0, 'PF': 0, 'PA': 0},
 MiniMax Agent 1: {'W': 0, 'L': 0, 'T': 0, 'PF': 0, 'PA': 0},
 MiniMax Agent 2: {'W': 0, 'L': 0, 'T': 0, 'PF': 0, 'PA': 0},
 MiniMax Agent 3: {'W': 0, 'L': 0, 'T': 0, 'PF': 0, 'PA': 0},
 MiniMax Agent 4: {'W': 0, 'L': 0, 'T': 0, 'PF': 0, 'PA': 0},
 MiniMax Agent 5: {'W': 0, 'L': 0, 'T': 0, 'PF': 0, 'PA': 0},
 MiniMax Agent 6: {'W': 0, 'L': 0, 'T': 0, 'PF': 0, 'PA': 0}}

In [564]:
for game, score in outcomes.items():
    agent1, agent2 = game
    score1, score2 = score
    
    if score1 > score2:
        standings[agent1]['W'] += 1
        standings[agent2]['L'] += 1
    elif score1 < score2:
        standings[agent1]['L'] += 1
        standings[agent2]['W'] += 1
    else:
        standings[agent1]['T'] += 1
        standings[agent2]['T'] += 1    
    standings[agent1]['PF'] += score1
    standings[agent2]['PF'] += score2
    standings[agent1]['PA'] += score2
    standings[agent2]['PA'] += score1
standings

{Random Agent: {'W': 0, 'L': 14, 'T': 0, 'PF': 132, 'PA': 540},
 Simple Agent: {'W': 5, 'L': 8, 'T': 1, 'PF': 326, 'PA': 346},
 MiniMax Agent 1: {'W': 2, 'L': 12, 'T': 0, 'PF': 274, 'PA': 398},
 MiniMax Agent 2: {'W': 7, 'L': 7, 'T': 0, 'PF': 330, 'PA': 342},
 MiniMax Agent 3: {'W': 9, 'L': 5, 'T': 0, 'PF': 386, 'PA': 286},
 MiniMax Agent 4: {'W': 11, 'L': 3, 'T': 0, 'PF': 388, 'PA': 284},
 MiniMax Agent 5: {'W': 10, 'L': 3, 'T': 1, 'PF': 411, 'PA': 261},
 MiniMax Agent 6: {'W': 11, 'L': 3, 'T': 0, 'PF': 441, 'PA': 231}}

In [577]:
df = pd.DataFrame(standings).T.sort_values(['W', 'PF'], ascending=False)
df

Unnamed: 0,W,L,T,PF,PA
MiniMax Agent 6,11,3,0,441,231
MiniMax Agent 4,11,3,0,388,284
MiniMax Agent 5,10,3,1,411,261
MiniMax Agent 3,9,5,0,386,286
MiniMax Agent 2,7,7,0,330,342
Simple Agent,5,8,1,326,346
MiniMax Agent 1,2,12,0,274,398
Random Agent,0,14,0,132,540


In [594]:
def simulate_playoff_round(agent1, agent2):
    wins1 = 0
    wins2 = 0
    for n in range(1,8):
        print(f'---GAME {n}---')
        outcome = battle(agent2, agent1, False) if n >= 3 and n <= 5 else battle(agent1, agent2, False)
        game, score = outcome
        score1, score2 = outcome
        if score1 > score2:
            if n >= 3 and n <= 5:
                wins2 += 1
            else:
                wins1 += 1
        elif score1 < score2:
            if n >= 3 and n <= 5:
                wins1 += 1
            else:
                wins2 += 1
    return wins1, wins2

In [595]:
simulate_playoff_round(minimax_agent_6, random_agent)

---GAME 1---
MiniMax Agent 6 wins 35-13
---GAME 2---
MiniMax Agent 6 wins 44-4
---GAME 3---
MiniMax Agent 6 wins 39-9
---GAME 4---
MiniMax Agent 6 wins 41-7
---GAME 5---
MiniMax Agent 6 wins 45-3
---GAME 6---
MiniMax Agent 6 wins 43-5
---GAME 7---
MiniMax Agent 6 wins 40-8


(7, 0)

In [596]:
simulate_playoff_round(minimax_agent_4, minimax_agent_1)

---GAME 1---
MiniMax Agent 4 wins 34-14
---GAME 2---
MiniMax Agent 4 wins 34-14
---GAME 3---
MiniMax Agent 4 wins 29-19
---GAME 4---
MiniMax Agent 4 wins 29-19
---GAME 5---
MiniMax Agent 4 wins 29-19
---GAME 6---
MiniMax Agent 4 wins 34-14
---GAME 7---
MiniMax Agent 4 wins 34-14


(7, 0)

In [597]:
simulate_playoff_round(minimax_agent_5, simple_agent)

---GAME 1---
MiniMax Agent 5 wins 27-21
---GAME 2---
MiniMax Agent 5 wins 27-21
---GAME 3---
Tie!
---GAME 4---
Tie!
---GAME 5---
Tie!
---GAME 6---
MiniMax Agent 5 wins 27-21
---GAME 7---
MiniMax Agent 5 wins 27-21


(4, 0)

In [598]:
simulate_playoff_round(minimax_agent_3, minimax_agent_2)

---GAME 1---
MiniMax Agent 3 wins 31-17
---GAME 2---
MiniMax Agent 3 wins 31-17
---GAME 3---
MiniMax Agent 2 wins 28-20
---GAME 4---
MiniMax Agent 2 wins 28-20
---GAME 5---
MiniMax Agent 2 wins 28-20
---GAME 6---
MiniMax Agent 3 wins 31-17
---GAME 7---
MiniMax Agent 3 wins 31-17


(4, 3)

In [599]:
simulate_playoff_round(minimax_agent_6, minimax_agent_3)

---GAME 1---
MiniMax Agent 6 wins 35-13
---GAME 2---
MiniMax Agent 6 wins 35-13
---GAME 3---
MiniMax Agent 3 wins 28-20
---GAME 4---
MiniMax Agent 3 wins 28-20
---GAME 5---
MiniMax Agent 3 wins 28-20
---GAME 6---
MiniMax Agent 6 wins 35-13
---GAME 7---
MiniMax Agent 6 wins 35-13


(4, 3)

In [600]:
simulate_playoff_round(minimax_agent_4, minimax_agent_5)

---GAME 1---
MiniMax Agent 4 wins 26-22
---GAME 2---
MiniMax Agent 4 wins 26-22
---GAME 3---
MiniMax Agent 5 wins 37-11
---GAME 4---
MiniMax Agent 5 wins 37-11
---GAME 5---
MiniMax Agent 5 wins 37-11
---GAME 6---
MiniMax Agent 4 wins 26-22
---GAME 7---
MiniMax Agent 4 wins 26-22


(4, 3)

In [601]:
simulate_playoff_round(minimax_agent_6, minimax_agent_4)

---GAME 1---
MiniMax Agent 6 wins 35-13
---GAME 2---
MiniMax Agent 6 wins 35-13
---GAME 3---
MiniMax Agent 4 wins 27-21
---GAME 4---
MiniMax Agent 4 wins 27-21
---GAME 5---
MiniMax Agent 4 wins 27-21
---GAME 6---
MiniMax Agent 6 wins 35-13
---GAME 7---
MiniMax Agent 6 wins 35-13


(4, 3)

In [613]:
battle(human, minimax_agent_6)

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
Player 1, choose a pit (0-5): 2
Human chooses pit 2.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|__________________________

Player 1, choose a pit (0-5): 1
Human chooses pit 1.

Current Player: 2
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_5_]  [_0_]  [_0_]  [_2_]  [_0_]  [_0_]   ___  |
| |   |                                            |   | |
| | 4 |                                            | 18 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_8_]  [_9_]  [_0_]  [_2_]  [_0_]  [_0_]        |
|________________________________________________________|
MiniMax Agent 6 chooses pit 9.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_5_]  [_0_]  [_0_]  [_2_]  [_0_]  [_0_]   ___  |
| |   |                                            |   | |
| | 4 |                                            | 18 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_8_]  [_10_]  [_1_]  [_0_]  [_0_]  [_0_]  

Player 1, choose a pit (0-5): 2
Human chooses pit 2.

Current Player: 2
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_1_]  [_1_]  [_0_]  [_0_]  [_0_]  [_8_]   ___  |
| |   |                                            |   | |
| | 13 |                                            | 23 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_0_]  [_0_]  [_0_]  [_0_]  [_2_]  [_0_]        |
|________________________________________________________|
MiniMax Agent 6 chooses pit 8.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_1_]  [_1_]  [_0_]  [_0_]  [_0_]  [_8_]   ___  |
| |   |                                            |   | |
| | 13 |                                            | 23 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_0_]  [_0_]  [_1_]  [_1_]  [_0_]  [_0_] 

(33, 15)

In [612]:
battle(minimax_agent_6, human)

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
MiniMax Agent 6 chooses pit 2.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________

KeyboardInterrupt: Interrupted by user

In [456]:
human = Human()
random_agent = RandomAgent()
simple_agent = SimpleAgent()
minimax_agent = MiniMaxAgent(2)

agent1 = simple_agent
agent2 = minimax_agent
battle(agent1, agent2)

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
I'm getting another turn hehe
Simple Agent chooses pit 2.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|_____________________

(21, 27)

In [472]:


def play_game(mancala, num_episodes=2):
    for episode in range(num_episodes):
        state = tuple(mancala.board)
        while not mancala.is_game_over():
            mancala.display_board()
            valid_moves = mancala.get_valid_moves()

            # Player's turn
            pit_index = int(input(f"Player {mancala.current_player + 1}, choose a pit (0-5): "))
            if not mancala.make_move(pit_index):
                continue

            next_state = tuple(mancala.board)
            reward = mancala.get_reward()

            # Update Q-value
            mancala.q_learning_agent.update_q_value(state, pit_index, reward, next_state)

            state = next_state

            # Check for game over
            if mancala.is_game_over():
                break

            # Agent's turn
            pit_index = mancala.q_learning_agent.choose_move(state, valid_moves)
            print(f"Agent chooses pit {pit_index}.")
            self.make_move(pit_index)

        print(f"Episode {episode + 1} completed.")

In [432]:
class QLearningAgent:
    def __init__(self, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1):
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.q_table = {}

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0.0)

    def choose_move(self, mancala_game): # state, valid_moves):
        next_state = tuple(mancala_game.board)
        valid_moves = mancala_game.get_valid_moves()
        
        self.update_q_value()
                    
                    
    def choose_move(self, state, valid_moves):            
        if np.random.rand() < self.exploration_rate:
            return np.random.choice(valid_moves)
        else:
            q_values = [self.get_q_value(state, move) for move in valid_moves]
            return valid_moves[np.argmax(q_values)]

    def update_q_value(self, state, action, reward, next_state):
        current_q = self.get_q_value(state, action)
        max_future_q = max([self.get_q_value(next_state, next_action) for next_action in range(6)])
        new_q = (1 - self.learning_rate) * current_q + self.learning_rate * (reward + self.discount_factor * max_future_q)
        self.q_table[(state, action)] = new_q

In [435]:
q_learning_agent = QLearningAgent()

In [436]:
battle(simple_agent, q_learning_agent)

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|________________________________________________________|
I'm getting another turn hehe
Simple Agent chooses pit 2.

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_4_]  [_4_]  [_0_]  [_5_]  [_5_]  [_5_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_4_]  [_4_]  [_4_]  [_4_]  [_4_]  [_4_]        |
|_____________________

TypeError: choose_move() missing 1 required positional argument: 'valid_moves'

In [None]:
def q_learning_battle(agent1, agent2):
    mancala_game = Mancala()

    while True:
        mancala_game.display_board()

        try:
            if mancala_game.current_player == 0:
                pit_index = -1
                
                valid_moves = mancala_game.valid_moves()
                if agent1 == QLearningAgent(state, valid_mvoes):
                    
                pit_index = agent1.choose_move(mancala_game)
                print(f"{agent1} chooses pit {pit_index}.")
            else:
                pit_index = agent2.choose_move(mancala_game)
                print(f"{agent2} chooses pit {pit_index}.")
            print()
        except ValueError:
            print("Invalid input. Please enter a number.")
            continue

        mancala_game.make_move(pit_index)

        # Check for game over
        if sum(mancala_game.board[0:6]) == 0 or sum(mancala_game.board[7:13]) == 0:
            mancala_game.display_board()
            print("Game over!")

            player_store1 = mancala_game.board[6]
            player_store2 = mancala_game.board[13]

            if player_store1 > player_store2:
                print(f'{agent1} wins {player_store1}-{player_store2}')
            elif player_store1 < player_store2:
                print(f'{agent2} wins {player_store2}-{player_store1}')
            else:
                print('Tie!')
                
            return player_store1, player_store2

In [114]:
# Example of using the Mancala class
mancala_game = Mancala()

while True:
    mancala_game.display_board()

    try:    
#         pit_index = int(input(f"Player {mancala_game.current_player + 1}, choose a pit (0-5): "))
        message = "Player 1, choose a pit (0-5): "
        if mancala_game.current_player == 1:
            message = "Player 2, choose a pit (7-12): "
        pit_index = int(input(message))
        print()
    except ValueError:
        print("Invalid input. Please enter a number.")
        continue
        
    if not mancala_game.make_move(pit_index):
        print('YEE')
        continue
        
    # Check for game over
    if sum(mancala_game.board[0:6]) == 0 or sum(mancala_game.board[7:13]) == 0:
        mancala_game.board[6] += sum(mancala_game.board[0:6])
        mancala_game.board[0:6] = [0,0,0,0,0,0]

        mancala_game.board[13] += sum(mancala_game.board[7:13])
        mancala_game.board[7:13] = [0,0,0,0,0,0]

        mancala_game.display_board()
        print("Game over!")
        break

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_1_]  [_1_]  [_1_]  [_1_]  [_1_]  [_1_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 0 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_1_]  [_1_]  [_1_]  [_1_]  [_1_]  [_1_]        |
|________________________________________________________|
Player 1, choose a pit (0-5): 5

Current Player: 1
 ________________________________________________________
|         ___    ___    ___    ___    ___    ___         |
|  ___   [_1_]  [_1_]  [_1_]  [_1_]  [_1_]  [_0_]   ___  |
| |   |                                            |   | |
| | 0 |                                            | 1 | |
| |___|   ___    ___    ___    ___    ___    ___   |___| |
|        [_1_]  [_1_]  [_1_]  [_1_]  [_1_]  [_1_]        |
|_______________________________________________