In [None]:
from typing import List, Tuple
import math, random

In [None]:
class TwoPlayerGameEnv:
    def moves(self, state) -> List:
        """Return possible moves for the current player in given state."""
        raise NotImplementedError

    def is_terminal(self, state) -> Tuple[bool, int]:
        """
        Check if the state is terminal.
        Returns (is_leaf, value)
        value = +1 if Player 1 wins, -1 if Player 2 wins, 0 if draw, None if not terminal
        """
        raise NotImplementedError

    def make_move(self, state, move):
        """Apply a move and return the new state."""
        raise NotImplementedError

    def render(self, state):
        """Print the state in a human-readable form."""
        raise NotImplementedError



In [None]:
class NimEnv(TwoPlayerGameEnv):
    def __init__(self, total_stones: int, max_take: int):
        self.total_stones = total_stones
        self.max_take = max_take

    def initial_state(self):
        # state = (stones_left, current_player)
        # current_player: 1 for Player 1, -1 for Player 2
        return (self.total_stones, 1)

    def moves(self, state) -> List[int]:
        stones, player = state
        return list(range(1, min(self.max_take, stones) + 1))

    def is_terminal(self, state) -> Tuple[bool, int]:
        stones, player = state
        if stones == 0:
            # If no stones left, previous player won
            return True, -player  # because 'player' is the one to move, so they lost
        if stones == 1:
            # If one stone left, previous player won
            return True, -player  # because 'player' is the one to move, so they lost
        return False, None

    def make_move(self, state, move):
        stones, player = state
        return (stones - move, -player)

    def render(self, state):
        stones, player = state
        print("Stones:", "● " * stones)
        print(f"Player to move: {'Player 1' if player == 1 else 'Player 2'}")

    def render_tree(self, state, depth=0, alpha_beta=False, alpha=-math.inf, beta=math.inf):
        """Recursively render the game tree in ASCII."""
        is_leaf, value = self.is_terminal(state)
        prefix = "  " * depth
        if is_leaf:
            print(f"{prefix}State={state}, Value={value}")
            return

        print(f"{prefix}State={state} (Player {state[1]})")
        for move in self.moves(state):
            next_state = self.make_move(state, move)
            print(f"{prefix} Move {move} →")
            self.render_tree(next_state, depth + 1, alpha_beta, alpha, beta)


In [None]:

class TicTacToeEnv(TwoPlayerGameEnv):
    def initial_state(self):
        # 3x3 empty board, current player 1
        return ([[' ' for _ in range(3)] for _ in range(3)], 1)

    def moves(self, state):
        board, player = state
        return [(i, j) for i in range(3) for j in range(3) if board[i][j] == ' ']

    def is_terminal(self, state):
        board, player = state

        # check rows, cols, diags
        lines = (
            board
            + [[board[i][j] for i in range(3)] for j in range(3)]
            + [[board[i][i] for i in range(3)]]
            + [[board[i][2-i] for i in range(3)]]
        )

        if ['X']*3 in lines: return True, 1
        if ['O']*3 in lines: return True, -1
        if all(cell != ' ' for row in board for cell in row): return True, 0
        return False, None

    def make_move(self, state, move):
        board, player = state
        i, j = move
        new_board = [row[:] for row in board]
        new_board[i][j] = 'X' if player == 1 else 'O'
        return (new_board, -player)

    def render(self, state):
        board, player = state
        for row in board:
            print('|'.join(row))
            print('-'*5)
        print("Next player:", "Player 1 (X)" if player == 1 else "Player 2 (O)")

    def render_tree(self, state, depth=0, max_depth=None):
        is_leaf, value = self.is_terminal(state)
        prefix = " " * depth
        if is_leaf:
            print(f"{prefix}Leaf Value={value}")
            self.render(state)
            return
        if max_depth is not None and depth >= max_depth:
            print(f"{prefix}Node (Player {state[1]}) ...")
            self.render(state)
            return
        print(f"{prefix}Node (Player {state[1]})")
        self.render(state)
        for move in self.moves(state):
            print(f"{prefix} Move {move} →")
            self.render_tree(self.make_move(state, move), depth + 1, max_depth)

In [None]:
from math import log2


class NormalFormGameEnv(TwoPlayerGameEnv):
    def __init__(self, leaf_values=None, value_range=[-1,1],n_leaves=16):
        # 2 moves at each depth, depth=4 => 16 leaves
        if leaf_values is None:
            self.leaf_values = [random.randint(value_range[0],value_range[1]) for _ in range(n_leaves)]
        else:
            assert len(leaf_values) == n_leaves
            self.leaf_values = leaf_values
        self.path_len=log2(n_leaves)

    def initial_state(self):
        # state = (path_so_far, current_player)
        return ("", 1)

    def moves(self, state):
        path, player = state
        if len(path) >= self.path_len:
            return []
        return [0, 1]  # binary choice

    def is_terminal(self, state):
        path, player = state
        if len(path) == self.path_len:
            idx = int(path, 2)
            return True, self.leaf_values[idx]
        return False, None

    def make_move(self, state, move):
        path, player = state
        return (path + str(move), -player)

    def render(self, state):
        path, player = state
        print(f"Path: {path} (depth={len(path)}), Player {player}")
        if len(path) == self.path_len:
            idx = int(path, 2)
            print(f"Leaf value: {self.leaf_values[idx]}")
            
    def render_tree(self, state, depth=0, max_depth=None):
        is_leaf, value = self.is_terminal(state)
        prefix = " " * depth
        if is_leaf:
            print(f"{prefix}Leaf path={state[0]}, Value={value}")
            self.render(state)
            return
        if max_depth is not None and depth >= max_depth:
            print(f"{prefix}Node path={state[0]} (Player {state[1]}) ...")
            self.render(state)
            return
        print(f"{prefix}Node path={state[0]} (Player {state[1]})")
        self.render(state)
        for move in self.moves(state):
            print(f"{prefix} Move {move} →")
            self.render_tree(self.make_move(state, move), depth + 1, max_depth)
