# Naughts and Crosses, the cooperative way

In [1]:
from naughtsandcrosses import *
from mcts import MCTS

from IPython.display import clear_output
from time import sleep

## Sanity check

It's probably a good idea to check if everything works as expected.

In [2]:
def play_one_game(s, p1, p2, show=True):
    while not s.is_terminal():
        if show:
            print(s)

        if s.currentPlayer == 1:
            action = p1.search(initial_state=s)
        else:
            action = p2.search(initial_state=s)
        s = s.take_action(action)

        if show:
            print(s)
            sleep(3) # so that you have time to see what's happening
            clear_output()
    return s

In [3]:
state = NaughtsAndCrossesState() # start with the 'X' player (i.e., 1)

strong_searcher = MCTS(iteration_limit=1000)
weak_searcher = MCTS(iteration_limit=1)

play_one_game(state, strong_searcher, weak_searcher) # ok, looks good, but we can do better

In [4]:
rewards = []
for n in range(100):
    state = NaughtsAndCrossesState() # start with the 'X' player (i.e., 1)

    strong_searcher = MCTS(iteration_limit=1000)
    weak_searcher = MCTS(iteration_limit=1)

    # switch the players, just for diversity's sake!
    final_state = play_one_game(state, weak_searcher, strong_searcher, show=False)
    rewards.append(final_state.get_reward())

In [5]:
rewards.count(-1) / len(rewards) # very good! the strong searcher wins every time, as expected

1.0

It's a good idea to test the cooperative reward, while we're at it...

In [11]:
test_boards = [
    [[1, -1, 1], [-1, 1, -1], [1, -1, 1]],  # Perfect chequered pattern
    [[1, -1, 1], [-1, 1, 1], [1, -1, 1]],  # Incorrect pattern (two adjacent 1s)
    [[1, -1, 1], [-1, 0, -1], [1, -1, 1]],  # Not fully filled
    [[1, 1, 1], [-1, -1, -1], [1, 1, 1]],  # All rows same
    [[1, -1, 1], [1, -1, 1], [1, -1, 1]],  # All columns same
    [[-1, 1, -1], [1, -1, 1], [-1, 1, -1]], # Alternate starting symbol, chequered
]

In [12]:
instances = [NaughtsAndCrossesState() for _ in range(len(test_boards))]

for b in range(len(test_boards)):
    instances[b].board = test_boards[b]

print(
    [(instances[i], instances[i].get_reward_cooperative()) for i in range(len(test_boards))]
    )

[(
X | O | X
O | X | O
X | O | X
, 1), (
X | O | X
O | X | X
X | O | X
, 0), (
X | O | X
O |   | O
X | O | X
, 0), (
X | X | X
O | O | O
X | X | X
, 0), (
X | O | X
X | O | X
X | O | X
, 0), (
O | X | O
X | O | X
O | X | O
, 1)]


Looks ok to me, now for the fun part.

## Cooperation 