In [1]:
from games.tictactoe.tictactoe import TicTacToe
from base.agent import Agent
from agents.agent_random import RandomAgent
from agents.mcts import MonteCarloTreeSearch
from agents.minimax import MiniMax
import numpy as np
from collections import defaultdict

In [2]:
game = TicTacToe(render_mode='')

In [3]:
def play(agent1: Agent, agent2: Agent, num_games: int):
    rewards = {
        agent1.agent: [],
        agent2.agent: []
    }

    players = {
        agent1.agent: agent1,
        agent2.agent: agent2
    } 

    for i in range(num_games):
        game.reset()

        while not game.terminated():
            agent = game.agent_selection
            action = players[agent].action()
            game.step(action)
        for agent in game.agents:
            rewards[agent].append(game.reward(agent))
    
    for agent in game.agents:
        print(f"Agent {agent} ({players[agent].__class__.__name__}) average reward: {np.mean(rewards[agent])} over {num_games} games")
        print(f"Agent {agent} ({players[agent].__class__.__name__}) rewards: {rewards[agent]}")

### Random vs Minmax

In [18]:
play(RandomAgent(game=game, agent=game.agents[0]), MiniMax(game=game, agent=game.agents[1], depth=4), num_games=10)

Agent X (RandomAgent) average reward: -0.6 over 10 games
Agent X (RandomAgent) rewards: [0, -1, -1, -1, -1, -1, 0, -1, -1, 1]
Agent O (MiniMax) average reward: 0.6 over 10 games
Agent O (MiniMax) rewards: [0, 1, 1, 1, 1, 1, 0, 1, 1, -1]


### Random vs MCTS

In [4]:
play(
    RandomAgent(game=game, agent=game.agents[0]),
    MonteCarloTreeSearch(game=game, agent=game.agents[1], simulations=100, rollouts=5, max_depth=4),
    num_games=10
)

Agent X (RandomAgent) average reward: -1.0 over 10 games
Agent X (RandomAgent) rewards: [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
Agent O (MonteCarloTreeSearch) average reward: 1.0 over 10 games
Agent O (MonteCarloTreeSearch) rewards: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
