<a href="https://colab.research.google.com/github/datapirate09/Tic-Tac-Toe-Game-using-Reinforcement-Learning-Methods/blob/main/value_iteration_forward_sweep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
states = {}

def board_to_tuple(board):
    return tuple(tuple(row) for row in board)

def getReward(board, player):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] != -1:
            return 1 if board[i][0] == player else -1

    for i in range(3):
        if board[0][i] == board[1][i] == board[2][i] != -1:
            return 1 if board[0][i] == player else -1

    if board[0][0] == board[1][1] == board[2][2] != -1:
        return 1 if board[0][0] == player else -1
    if board[0][2] == board[1][1] == board[2][0] != -1:
        return 1 if board[0][2] == player else -1
    return 0

def isEndOfGame(board):
    if getReward(board, 0) != 0 or getReward(board, 1) != 0:
        return True

    return all(cell != -1 for row in board for cell in row)

def getNextStates(board, turn):
    next_states = []
    for i in range(3):
        for j in range(3):
            if board[i][j] == -1:
                new_board = [row[:] for row in board]
                new_board[i][j] = turn
                next_states.append(new_board)
    return next_states

def getCurrentTurn(board):
    count_0 = sum(row.count(0) for row in board)
    count_1 = sum(row.count(1) for row in board)
    return 1 if count_0 > count_1 else 0

def initialize_states():
    initial_board = [[-1 for _ in range(3)] for _ in range(3)]
    queue = [initial_board]
    visited = {board_to_tuple(initial_board)}

    while queue:
        board = queue.pop(0)
        board_tuple = board_to_tuple(board)

        if isEndOfGame(board):
            reward_for_X = getReward(board, 0)
            states[board_tuple] = reward_for_X
            continue

        states[board_tuple] = 0

        turn = getCurrentTurn(board)
        for next_board in getNextStates(board, turn):
            next_tuple = board_to_tuple(next_board)
            if next_tuple not in visited:
                visited.add(next_tuple)
                queue.append(next_board)

def value_iterate():
    iterations = 1000
    for i in range(iterations):
        if i % 100 == 0:
            print("Iteration:", i)
        delta = 0
        for state_tuple in states:
            old_value = states[state_tuple]
            board = [list(row) for row in state_tuple]

            if isEndOfGame(board):
                continue
            turn = getCurrentTurn(board)
            next_states_list = getNextStates(board, turn)

            if turn == 0:
                max_value = float('-inf')
                for next_board in next_states_list:
                    next_tuple = board_to_tuple(next_board)
                    immediate_reward = getReward(next_board, 0)
                    future_value = states.get(next_tuple, 0)
                    value = immediate_reward + 0.9 * future_value
                    max_value = max(max_value, value)
                if len(next_states_list) > 0:
                    states[state_tuple] = max_value

            elif turn == 1:
                min_value = float('inf')
                for next_board in next_states_list:
                    next_tuple = board_to_tuple(next_board)
                    immediate_reward = getReward(next_board, 0)
                    future_value = states.get(next_tuple, 0)
                    value = immediate_reward + 0.9 * future_value
                    min_value = min(min_value, value)
                if len(next_states_list) > 0:
                    states[state_tuple] = min_value
            delta = max(delta, abs(old_value - states[state_tuple]))
        if delta < 0.001:
            print(f"Converged after {i} iterations")
            break

def print_board(board):
    symbols = {0: 'X', 1: 'O', -1: ' '}
    print()
    for row in board:
        print('|'.join(symbols[cell] for cell in row))
        print('-' * 5)

def get_best_move(board, turn):
    if turn == 0:
        best_val = float('-inf')
        best_move = None
        for next_board in getNextStates(board, turn):
            val = states.get(board_to_tuple(next_board), 0.0)
            if val > best_val:
                best_val = val
                best_move = next_board
    else:
        best_val = float('inf')
        best_move = None
        for next_board in getNextStates(board, turn):
            val = states.get(board_to_tuple(next_board), 0.0)
            if val < best_val:
                best_val = val
                best_move = next_board

    return best_move

def play_game():
    initialize_states()
    value_iterate()
    board = [[-1 for _ in range(3)] for _ in range(3)]
    turn = 0

    print("\nTic Tac Toe Game")
    print("You are O, the AI is X")

    while not isEndOfGame(board):
        print("\nCurrent board:")
        print_board(board)

        if turn == 0:
            print("AI's turn (X):")
            board = get_best_move(board, 0)
        else:
            print("Your turn (O):")
            while True:
                try:
                    i = int(input("Enter row (0-2): "))
                    j = int(input("Enter col (0-2): "))
                    if 0 <= i <= 2 and 0 <= j <= 2 and board[i][j] == -1:
                        board[i][j] = 1
                        break
                    else:
                        print("Invalid move. Try again.")
                except:
                    print("Invalid input. Try again.")
        turn = 1 - turn

    print("\nFinal board:")
    print_board(board)
    result = getReward(board, 0)
    if result == 1:
        print("AI (X) wins!")
    elif result == -1:
        print("You (O) win!")
    else:
        print("It's a draw!")

play_game()

Iteration: 0
Converged after 5 iterations

Tic Tac Toe Game
You are O, the AI is X

Current board:

 | | 
-----
 | | 
-----
 | | 
-----
AI's turn (X):

Current board:

X| | 
-----
 | | 
-----
 | | 
-----
Your turn (O):
Enter row (0-2): 1
Enter col (0-2): 1

Current board:

X| | 
-----
 |O| 
-----
 | | 
-----
AI's turn (X):

Current board:

X|X| 
-----
 |O| 
-----
 | | 
-----
Your turn (O):
Enter row (0-2): 0
Enter col (0-2): 2

Current board:

X|X|O
-----
 |O| 
-----
 | | 
-----
AI's turn (X):

Current board:

X|X|O
-----
 |O| 
-----
X| | 
-----
Your turn (O):
Enter row (0-2): 1
Enter col (0-2): 0

Current board:

X|X|O
-----
O|O| 
-----
X| | 
-----
AI's turn (X):

Current board:

X|X|O
-----
O|O|X
-----
X| | 
-----
Your turn (O):
Enter row (0-2): 2
Enter col (0-2): 1

Current board:

X|X|O
-----
O|O|X
-----
X|O| 
-----
AI's turn (X):

Final board:

X|X|O
-----
O|O|X
-----
X|O|X
-----
It's a draw!
