# Tic-tac-toe minmax algorithm with search limits
We will demonstrate the minmax algorithm on a game played on a 3x3 game board.

In [5]:
import numpy as np
import copy
import time

The State class captures the current state of the game.
* **Attributes**
    * gameplan - 3x3 game board with values 0 to 2
    * player - the player who is currently on the turn
    * current_player - the player will analyze the game and keep track of possible new states. Players take turns, so the new state should be viewed from the perspective of the opposing player. the player who is on the turn in the current state when searching the state space
    * depth - the depth of the analyzed state
    * max_depth - how many moves ahead the maximum is looking. If depth = max_depth, I don't analyze the game any further.

* **Methods**
    * terminal_test - method returns information whether the current state is final or not. If it is, it returns the winner.
    * utility - the method tries to evaluate the current state from the player's perspective. In the basic version, it only distinguishes whether the player wins, doesn't win or the move doesn't lead to the end of the game.
    * possible_actions - the method returns a list of possible moves. In the case of biscuits, this will be the coordinates of the playing area where the playing stone can be placed.
    * expand - this method takes the current state and action definition (the coordinates where to place the die) and creates a new game state.
    * minmax - custom implementation of the minmax algorithm
    * next_current_player - returns the opponent to the current_player variable
    * next_player - returns the opponents to the player variable    

In [29]:
class State:
    """ Capturing the state of the game
    gameplan - two-dimensional 3x3 array (0: empty, 1: X, 2: O)
    player - the player who has the turn in the game
    current_player - player who is on the turn in the given state when searching the state space
    depth - depth of the state space search
    max_depth - maximum length of the search
    """

    generated = 0

    def __init__(self, gameplan, player, current_player=None, depth=0, max_depth=3):
        self.gameplan = gameplan
        self.player = player
        if current_player is None:
            self.current_player = player
        else:
            self.current_player = current_player
        self.depth = depth
        self.max_depth = max_depth
        
        State.generated += 1

    def terminal_test(self):
        """ The method tests the current state and returns a value indicating whether the game is finished and, if so, who the winner is
            0 - no final status (game continues)
            1 - Player 1 wins
            2 - Player 2 wins
            -1 - Draw
        """
        # Horizontal and Vertical Check
        for i in range(3):
            if np.array_equal(self.gameplan[i], [1, 1, 1]) or np.array_equal(self.gameplan[:, i], [1, 1, 1]):
                return 1
            if np.array_equal(self.gameplan[i], [2, 2, 2]) or np.array_equal(self.gameplan[:, i], [2, 2, 2]):
                return 2

        # Diagonal Check
        if np.array_equal(self.gameplan.diagonal(), [1, 1, 1]) or np.array_equal(np.fliplr(self.gameplan).diagonal(), [1, 1, 1]):
            return 1
        if np.array_equal(self.gameplan.diagonal(), [2, 2, 2]) or np.array_equal(np.fliplr(self.gameplan).diagonal(), [2, 2, 2]):
            return 2
        
        # Draw Check: If no winner and no empty spaces
        if 0 not in self.gameplan:
            return -1 

        return 0 # Game continues

    def utility(self, result):
        """ The method returns an evaluation of the current state of the game
            (from the perspective of the original player self.player)
        """
        if result == -1: # Draw
            return 0
        elif result == self.player: # Original player wins
            return 1
        else: # Opponent wins
            return -1

    def possible_actions(self):
        """ The method returns a list of possible actions (coordinates of empty fields). """
        possible_actions = []
        for i in range(3):
            for j in range(3):
                if self.gameplan[i][j] == 0:
                    possible_actions.append((i, j))
        return possible_actions

    def expand(self, select_action):
        """ Creates a new game state by applying the action. """
        if select_action[0] not in range(3) or select_action[1] not in range(3) or self.gameplan[select_action[0], select_action[1]] != 0:
            return None
        
        new_array = np.copy(self.gameplan)
        new_array[select_action[0], select_action[1]] = self.current_player 
        
        return State(new_array, 
                     self.player, 
                     self.next_current_player(), 
                     self.depth + 1, 
                     max_depth=self.max_depth)
        
    def minmax(self, strategy="max"):
        """ The depth-limited Minimax algorithm. """
        
        # 1. Check for Terminal State (Win/Loss/Draw)
        result = self.terminal_test()
        actions = self.possible_actions()
        
        if result != 0: 
            # Game ended, return utility score and a dummy action
            return self.utility(result), actions[0] if actions else None
        
        # 2. **APPLY MAXIMUM DEPTH RESTRICTION**
        if self.depth >= self.max_depth:
            # Max depth reached, stop search and return heuristic evaluation (0 for draw/undecided)
            return 0, actions[0] if actions else None

        # 3. Initialization
        if strategy == "max":
            selected_utilization_value = float('-inf')
            next_strategy = "min"
        else: 
            selected_utilization_value = float('inf')
            next_strategy = "max"

        if not actions:
            return 0, None # Safety check for draw
            
        selected_action = actions[0]

        # 4. Search Loop
        for action in actions:
            expanded_state = self.expand(action)
            
            # Recursive Call
            utilization, _ = expanded_state.minmax(next_strategy)

            # 5. Update based on strategy
            if strategy == "max":
                if utilization > selected_utilization_value:
                    selected_utilization_value = utilization
                    selected_action = action
            else: # strategy == "min"
                if utilization < selected_utilization_value:
                    selected_utilization_value = utilization
                    selected_action = action

        return selected_utilization_value, selected_action

    def next_current_player(self):
        """ Returns the opponent for the state space searching. """
        return 3 - self.current_player

    def next_player(self):
        """ Returns the opponent for the actual game turn. """
        return 3 - self.player


#test

def run_game(max_depth):
    """ Runs a game with the specified max_depth and returns metrics. """
    State.generated = 0
    start_time = time.time()
    
    # Initialize the state (Game plan is empty, P1 starts)
    state = State(gameplan=np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]]),
                  player=1, max_depth=max_depth)
    
    total_generated = 0
    game_result = 0 # 0: Continue, 1: P1 Win, 2: P2 Win, -1: Draw

    while True:
        game_result = state.terminal_test()
        if game_result != 0:
            break

        # Current player's move (always uses "max" strategy)
        State.generated = 0
        _, player_action = state.minmax("max") 
        total_generated += State.generated
        
        if player_action is None:
             game_result = -1 
             break

        state = state.expand(player_action)
        
        # Switching the game turn to the other player
        state.player = state.next_player()

    end_time = time.time()
    duration = end_time - start_time
    
    if game_result == -1:
        winner = "Drawn"
    elif game_result == 1 or game_result == 2:
        winner = f"Player {game_result} Wins"
    else:
        winner = "Unknown"
        
    return winner, duration, total_generated

# --- Run Tests ---
depths_to_test = [2, 4, 9] 
results = {}

print("ðŸš€ Tic-Tac-Toe Minimax Search Depth Tests")
print("="*50)

for depth in depths_to_test:
    winner, duration, generated_states = run_game(depth)
    results[depth] = {
        "winner": winner,
        "time": duration,
        "generated_states": generated_states
    }

# --- Results Comparison ---
print("\n" + "="*50)
print("ðŸ“Š Results Comparison")
print("="*50)

print("| Constraint (D) | Game Result | Total Time (s) | Generated States |")
print("|---------------|-------------|-----------------|------------------|")
for depth, data in results.items():
    print(f"| D={depth:<10}| {data['winner']:<11} | {data['time']:.4f} s | {data['generated_states']:<16}|")
print("="*50)

print("\n**Observation Summary:** As the depth limit decreases, time and the number of generated states drop rapidly, but the game outcome may deviate from the optimal (draw) result.")

ðŸš€ Tic-Tac-Toe Minimax Search Depth Tests

ðŸ“Š Results Comparison
| Constraint (D) | Game Result | Total Time (s) | Generated States |
|---------------|-------------|-----------------|------------------|
| D=2         | Player 1 Wins | 0.0083 s | 89              |
| D=4         | Player 1 Wins | 0.1765 s | 4064            |
| D=9         | Drawn       | 22.9571 s | 618175          |

**Observation Summary:** As the depth limit decreases, time and the number of generated states drop rapidly, but the game outcome may deviate from the optimal (draw) result.


# Task
- Add a constraint to the algorithm to limit the maximum search depth.
- Try different search depth constraints.
- Observe how the times and numbers of generated states change
- Are the game results changing?

You need to implement a limitation on the # !!! todo

In [30]:
# Creating the initial state of the game
    # Game plan is empty
    # the turn is player 1
state = State(gameplan=np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]]),
              player=1, max_depth=2)
# Cycle for a game where two copies of the algorithm play against each other
while True:
    # Check if the game is not over
    game_result = state.terminal_test()
    if game_result != 0:
        print(f"Winner is {game_result} ")
        break

    # Checking for drawn
    if len(state.possible_actions()) == 0:
        print("Drawn")
        break

    # player move
    print(f"=====================\nPlayer {state.player}")
    _, player_action = state.minmax("max")
    print(f"Select action: {player_action}")
    state = state.expand(player_action)
    print(state.gameplan)
    print(f"Generated states {State.generated}.")
    State.generated = 0

    # switching the game to the other player
    state.player = state.next_player()

Player 1
Select action: (0, 0)
[[1 0 0]
 [0 0 0]
 [0 0 0]]
Generated states 85.
Player 2
Select action: (0, 1)
[[1 2 0]
 [0 0 0]
 [0 0 0]]
Generated states 9.
Player 1
Select action: (0, 2)
[[1 2 1]
 [0 0 0]
 [0 0 0]]
Generated states 1.
Player 2
Select action: (1, 0)
[[1 2 1]
 [2 0 0]
 [0 0 0]]
Generated states 1.
Player 1
Select action: (1, 1)
[[1 2 1]
 [2 1 0]
 [0 0 0]]
Generated states 1.
Player 2
Select action: (1, 2)
[[1 2 1]
 [2 1 2]
 [0 0 0]]
Generated states 1.
Player 1
Select action: (2, 0)
[[1 2 1]
 [2 1 2]
 [1 0 0]]
Generated states 1.
Winner is 1 
