In [None]:
from ttt_env import TicTacToeEnv
from agents import RandomAgent
import train

def render_board(board):
    # board: [0,1,-1] * 9
    symbols = {1: "O", -1: "X", 0: " "}
    cells = [symbols[v] for v in board]
    print(f"""
 {cells[0]} | {cells[1]} | {cells[2]}
---+---+---
 {cells[3]} | {cells[4]} | {cells[5]}
---+---+---
 {cells[6]} | {cells[7]} | {cells[8]}
""")
    print("Indices:")
    print("""
 0 | 1 | 2
---+---+---
 3 | 4 | 5
---+---+---
 6 | 7 | 8
""")

def ask_human_move(env):
    actions = env.available_actions()
    while True:
        try:
            a = int(input(f"Choose move {actions}: "))
            if a in actions:
                return a
            print("Invalid move. Try again.")
        except ValueError:
            print("Please enter an integer.")

def play_human_vs_human():
    env = TicTacToeEnv()
    env.reset()
    done = False

    print("Human vs Human. O starts (player=1). X is player=-1.")
    while not done:
        render_board(env.board)
        print("Current player:", "O" if env.current_player == 1 else "X")
        action = ask_human_move(env)
        _, reward, done = env.step(action)

    render_board(env.board)
    winner = env.check_winner()
    if winner == 1:
        print("O wins!")
    elif winner == -1:
        print("X wins!")
    else:
        print("Draw!")

def play_human_vs_random(human_is_O=True):
    env = TicTacToeEnv()
    rnd = RandomAgent()
    env.reset()
    done = False

    human_player = 1 if human_is_O else -1
    print(f"Human vs Random. Human is {'O' if human_player==1 else 'X'}.")

    while not done:
        render_board(env.board)

        if env.current_player == human_player:
            action = ask_human_move(env)
        else:
            action = rnd.act(env.board, env.available_actions())
            print("Random plays:", action)

        _, _, done = env.step(action)

    render_board(env.board)
    winner = env.check_winner()
    if winner == human_player:
        print("Human wins!")
    elif winner is None:
        print("Draw!")
    else:
        print("Random wins!")

def play_human_vs_policy(agent, human_is_O=True):
    env = TicTacToeEnv()
    env.reset()
    done = False

    human_player = 1 if human_is_O else -1
    print(f"Human vs Policy. Human is {'O' if human_player==1 else 'X'}.")

    while not done:
        render_board(env.board)

        if env.current_player == human_player:
            action = ask_human_move(env)
        else:
            action = agent.act(env.board, env.available_actions(), training=False)
            print("Policy plays:", action)

        _, _, done = env.step(action)

    render_board(env.board)
    winner = env.check_winner()
    if winner == human_player:
        print("Human wins!")
    elif winner is None:
        print("Draw!")
    else:
        print("Policy wins!")


In [None]:
play_human_vs_human()


In [None]:
play_human_vs_random(human_is_O=True)


In [None]:
agent = train.train(50000)
play_human_vs_policy(agent, human_is_O=True)
