<a href="https://colab.research.google.com/github/datapirate09/Tic-Tac-Toe-Game-using-Policy-Evaluation/blob/main/policy_eval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import random
import copy

actions = [[i, j] for i in range(3) for j in range(3)]
states = {}

def isEndOfGame(board):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] != -1: return True
        if board[0][i] == board[1][i] == board[2][i] != -1: return True
    if board[0][0] == board[1][1] == board[2][2] != -1: return True
    if board[0][2] == board[1][1] == board[2][0] != -1: return True
    if all(cell != -1 for row in board for cell in row): return True
    return False

def check_win(board):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] == 0: return True
        if board[0][i] == board[1][i] == board[2][i] == 0: return True
    if board[0][0] == board[1][1] == board[2][2] == 0: return True
    if board[0][2] == board[1][1] == board[2][0] == 0: return True
    return False

def check_loss(board):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] == 1: return True
        if board[0][i] == board[1][i] == board[2][i] == 1: return True
    if board[0][0] == board[1][1] == board[2][2] == 1: return True
    if board[0][2] == board[1][1] == board[2][0] == 1: return True
    return False

def get_reward(board):
    if isEndOfGame(board):
        if check_win(board): return 10
        elif check_loss(board): return -10
        else: return -1
    return -0.1

def get_all_next_states(state, player):
    next_states = []
    for a in actions:
        x, y = a
        if state[x][y] == -1:
            next_state = copy.deepcopy(state)
            next_state[x][y] = player
            next_states.append((tuple(map(tuple, next_state)), a))
    return next_states

def policy_evaluation(iterations=5000, gamma=0.9):
    for i in range(iterations):
        new_states = {}
        for state_tuple in list(states.keys()):
            state = [list(row) for row in state_tuple]
            if isEndOfGame(state):
                continue

            turn = sum(cell != -1 for row in state for cell in row) % 2
            possible_next_states = get_all_next_states(state, turn)

            v = 0
            for next_state, _ in possible_next_states:
                next_state_board = [list(row) for row in next_state]
                reward = get_reward(next_state_board)
                v += (1 / len(possible_next_states)) * (reward + gamma * states.get(next_state, 0))

            new_states[state_tuple] = v

        for k, v in new_states.items():
            states[k] = v

        if (i + 1) % 500 == 0:
            print(f"Completed Iteration {i + 1}")

def initialize_states():
    def dfs(state, turn):
        state_tuple = tuple(map(tuple, state))
        if state_tuple in states:
            return
        states[state_tuple] = 0

        if isEndOfGame(state):
            return

        for a in actions:
            x, y = a
            if state[x][y] == -1:
                next_state = copy.deepcopy(state)
                next_state[x][y] = turn
                dfs(next_state, 1 - turn)

    empty = [[-1 for _ in range(3)] for _ in range(3)]
    dfs(empty, 0)

initialize_states()
policy_evaluation()

Completed Iteration 500
Completed Iteration 1000
Completed Iteration 1500
Completed Iteration 2000
Completed Iteration 2500
Completed Iteration 3000
Completed Iteration 3500
Completed Iteration 4000
Completed Iteration 4500
Completed Iteration 5000


In [23]:
import random

def player_move(states, current_state, actions, turn):
    best_value = float('-inf')
    best_moves = []

    for action in actions:
        x_val, y_val = action
        if current_state[x_val][y_val] == -1:
            next_state = [row[:] for row in current_state]
            next_state[x_val][y_val] = turn

            next_state_tuple = tuple(map(tuple, next_state))
            state_key = (next_state_tuple, 1 - turn)
            state_value = states.get(state_key, 0)

            if state_value > best_value:
                best_value = state_value
                best_moves = [action]
            elif state_value == best_value:
                best_moves.append(action)

    if best_moves:
        return random.choice(best_moves)
    else:
        return None

def print_board(board):
    symbols = {0: 'X', 1: 'O', -1: ' '}
    for row in board:
        print(' | '.join(symbols[cell] for cell in row))
        print('-' * 5)

def isEndOfGame(board):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] != -1: return True
        if board[0][i] == board[1][i] == board[2][i] != -1: return True
    if board[0][0] == board[1][1] == board[2][2] != -1: return True
    if board[0][2] == board[1][1] == board[2][0] != -1: return True
    if all(cell != -1 for row in board for cell in row): return True
    return False

def check_win(board):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] == 1: return True
        if board[0][i] == board[1][i] == board[2][i] == 1: return True
    if board[0][0] == board[1][1] == board[2][2] == 1: return True
    if board[0][2] == board[1][1] == board[2][0] == 1: return True
    return False

def play_game(states, actions):
    current_state = [[-1, -1, -1], [-1, -1, -1], [-1, -1, -1]]
    remaining_actions = actions.copy()
    turn = 0

    while not isEndOfGame(current_state):
        print_board(current_state)

        if turn == 0:
            print("Computer (X) is making a move...")
            best_move = player_move(states, current_state, remaining_actions, turn)
            if best_move:
                x_val, y_val = best_move
                current_state[x_val][y_val] = 0
                print(f"Computer (X) chose: {best_move}")
            else:
                print("No valid move for Computer!")
        else:
            print("Your turn! (You are O)")
            while True:
                try:
                    x, y = map(int, input("Enter your move (row and column, space-separated): ").split())
                    if current_state[x][y] == -1:
                        current_state[x][y] = 1
                        break
                    else:
                        print("Cell already occupied! Choose another spot.")
                except (ValueError, IndexError):
                    print("Invalid input! Please enter valid row and column (0-2).")

        turn = 1 - turn
        remaining_actions = [action for action in actions if current_state[action[0]][action[1]] == -1]

    print_board(current_state)
    if check_win(current_state):
        print("You (O) win!" if turn == 0 else "Computer (X) wins!")
    else:
        print("It's a draw!")

actions = [[i, j] for i in range(3) for j in range(3)]
print("Starting a new game after training with learned state values...")
print("Total states learned:", len(states))
play_game(states, actions)

Starting a new game after training with learned state values...
Total states learned: 5478
  |   |  
-----
  |   |  
-----
  |   |  
-----
Computer (X) is making a move...
Computer (X) chose: [2, 0]
  |   |  
-----
  |   |  
-----
X |   |  
-----
Your turn! (You are O)
Enter your move (row and column, space-separated): 1 1
  |   |  
-----
  | O |  
-----
X |   |  
-----
Computer (X) is making a move...
Computer (X) chose: [1, 0]
  |   |  
-----
X | O |  
-----
X |   |  
-----
Your turn! (You are O)
Enter your move (row and column, space-separated): 0 0
O |   |  
-----
X | O |  
-----
X |   |  
-----
Computer (X) is making a move...
Computer (X) chose: [2, 1]
O |   |  
-----
X | O |  
-----
X | X |  
-----
Your turn! (You are O)
Enter your move (row and column, space-separated): 2 2
O |   |  
-----
X | O |  
-----
X | X | O
-----
You (O) win!
