<a href="https://colab.research.google.com/github/datapirate09/Tic-Tac-Toe-Game-using-Policy-Evaluation/blob/main/value_iteration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
states = {}

def board_to_tuple(board):
    return tuple(tuple(row) for row in board)

def getReward(board):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] != -1:
            return 1 if board[i][0] == 0 else -1
        if board[0][i] == board[1][i] == board[2][i] != -1:
            return 1 if board[0][i] == 0 else -1
    if board[0][0] == board[1][1] == board[2][2] != -1:
        return 1 if board[0][0] == 0 else -1
    if board[0][2] == board[1][1] == board[2][0] != -1:
        return 1 if board[0][2] == 0 else -1
    return 0

def isEndOfGame(board):
    if getReward(board) != 0:
        return True
    return all(cell != -1 for row in board for cell in row)

def getNextStates(board, turn):
    next_states = []
    for i in range(3):
        for j in range(3):
            if board[i][j] == -1:
                new_board = [row[:] for row in board]
                new_board[i][j] = turn
                next_states.append(new_board)
    return next_states

def dfs(board, turn):
    board_key = board_to_tuple(board)
    if board_key in states:
        return states[board_key]

    if isEndOfGame(board):
        reward = getReward(board)
        states[board_key] = (reward, 0)
        return (reward, 0)

    best_value = float('-inf') if turn == 0 else float('inf')
    best_future_val_fn = 0

    for next_board in getNextStates(board, turn):
        reward, value_fn = dfs(next_board, 1 - turn)
        total_val = reward + 0.9 * value_fn

        if turn == 0:
            if total_val > best_value:
                best_value = total_val
                best_future_val_fn = value_fn
        else:
            if total_val < best_value:
                best_value = total_val
                best_future_val_fn = value_fn

    states[board_key] = (best_value, best_future_val_fn)
    return states[board_key]

def print_board(board):
    symbols = {0: 'X', 1: 'O', -1: ' '}
    for row in board:
        print('|'.join(symbols[cell] for cell in row))
        print('-' * 5)

def get_best_move(board, turn):
    best_val = float('-inf')
    best_move = None
    for next_board in getNextStates(board, turn):
        val, _ = states.get(board_to_tuple(next_board), (0, 0))
        if val > best_val:
            best_val = val
            best_move = next_board
    return best_move

def play_game():
    initial_board = [[-1 for _ in range(3)] for _ in range(3)]
    dfs(initial_board, 0)

    board = initial_board
    turn = 0

    while not isEndOfGame(board):
        print("\n")
        print("Current board:")
        print_board(board)

        if turn == 0:
            print("Algorithm's turn (X):")
            board = get_best_move(board, 0)
        else:
            print("Your turn (O):")
            while True:
                try:
                    i = int(input("Enter row (0-2): "))
                    j = int(input("Enter col (0-2): "))
                    if board[i][j] == -1:
                        board[i][j] = 1
                        break
                    else:
                        print("Cell already taken.")
                except:
                    print("Invalid input. Try again.")

        turn = 1 - turn
    print("\n")
    print("Final board:")
    print_board(board)
    result = getReward(board)
    if result == 1:
        print("Algorithm (X) wins!")
    elif result == -1:
        print("You (O) win!")
    else:
        print("It's a draw!")

play_game()



Current board:
 | | 
-----
 | | 
-----
 | | 
-----
Algorithm's turn (X):


Current board:
X| | 
-----
 | | 
-----
 | | 
-----
Your turn (O):
Enter row (0-2): 0
Enter col (0-2): 2


Current board:
X| |O
-----
 | | 
-----
 | | 
-----
Algorithm's turn (X):


Current board:
X| |O
-----
X| | 
-----
 | | 
-----
Your turn (O):
Enter row (0-2): 2
Enter col (0-2): 0


Current board:
X| |O
-----
X| | 
-----
O| | 
-----
Algorithm's turn (X):


Current board:
X| |O
-----
X|X| 
-----
O| | 
-----
Your turn (O):
Enter row (0-2): 2
Enter col (0-2): 2


Current board:
X| |O
-----
X|X| 
-----
O| |O
-----
Algorithm's turn (X):


Final board:
X| |O
-----
X|X|X
-----
O| |O
-----
Algorithm (X) wins!
