<a href="https://colab.research.google.com/github/datapirate09/Tic-Tac-Toe-Game-using-Reinforcement-Learning-Methods/blob/main/value_iteration_forward_sweep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
states = {}

def board_to_tuple(board):
    return tuple(tuple(row) for row in board)

def getReward(board, player):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] != -1:
            return 1 if board[i][0] == player else -1

    for i in range(3):
        if board[0][i] == board[1][i] == board[2][i] != -1:
            return 1 if board[0][i] == player else -1

    if board[0][0] == board[1][1] == board[2][2] != -1:
        return 1 if board[0][0] == player else -1
    if board[0][2] == board[1][1] == board[2][0] != -1:
        return 1 if board[0][2] == player else -1
    return 0

def isEndOfGame(board):
    if getReward(board, 0) != 0 or getReward(board, 1) != 0:
        return True

    return all(cell != -1 for row in board for cell in row)

def getNextStates(board, turn):
    next_states = []
    for i in range(3):
        for j in range(3):
            if board[i][j] == -1:
                new_board = [row[:] for row in board]
                new_board[i][j] = turn
                next_states.append(new_board)
    return next_states

def getCurrentTurn(board):
    count_0 = sum(row.count(0) for row in board)
    count_1 = sum(row.count(1) for row in board)
    return 1 if count_0 > count_1 else 0

def initialize_states():
    initial_board = [[-1 for _ in range(3)] for _ in range(3)]
    queue = [initial_board]
    visited = {board_to_tuple(initial_board)}

    while queue:
        board = queue.pop(0)
        board_tuple = board_to_tuple(board)

        if isEndOfGame(board):
            reward_for_X = getReward(board, 0)
            states[board_tuple] = reward_for_X
            continue

        states[board_tuple] = 0

        turn = getCurrentTurn(board)
        for next_board in getNextStates(board, turn):
            next_tuple = board_to_tuple(next_board)
            if next_tuple not in visited:
                visited.add(next_tuple)
                queue.append(next_board)

def value_iterate(discount_factor=0.9, iterations=1000):
    for i in range(iterations):
        if i % 100 == 0:
            print(f"Iteration: {i}")

        delta = 0

        for state_tuple in states:
            old_value = states[state_tuple]
            board = [list(row) for row in state_tuple]

            if isEndOfGame(board):
                continue

            turn = getCurrentTurn(board)
            next_states_list = getNextStates(board, turn)

            if turn == 0:
                best_value = float('-inf')
                for next_board in next_states_list:
                    next_tuple = board_to_tuple(next_board)
                    immediate_reward = getReward(next_board, 0)
                    future_value = states.get(next_tuple, 0)
                    value = immediate_reward + discount_factor * future_value
                    best_value = max(best_value, value)

                if len(next_states_list) > 0:
                    states[state_tuple] = best_value

            else:
                worst_value = float('inf')
                for next_board in next_states_list:
                    next_tuple = board_to_tuple(next_board)
                    immediate_reward = getReward(next_board, 0)
                    future_value = states.get(next_tuple, 0)
                    value = immediate_reward + discount_factor * future_value
                    worst_value = min(worst_value, value)

                if len(next_states_list) > 0:
                    states[state_tuple] = worst_value

            delta = max(delta, abs(old_value - states[state_tuple]))

        if delta < 0.001:
            print(f"Converged after {i} iterations")
            break

def print_board(board):
    symbols = {0: 'X', 1: 'O', -1: ' '}
    print("\n")
    print("  0 1 2")
    for i, row in enumerate(board):
        print(f"{i} {symbols[row[0]]}|{symbols[row[1]]}|{symbols[row[2]]}")
        if i < 2:
            print("  -+-+-")
    print("\n")

def get_best_move(board, turn):
    best_value = float('-inf') if turn == 0 else float('inf')
    best_move = None

    for next_board in getNextStates(board, turn):
        next_tuple = board_to_tuple(next_board)
        value = states.get(next_tuple, 0)

        if turn == 0 and value > best_value:
            best_value = value
            best_move = next_board
        elif turn == 1 and value < best_value:
            best_value = value
            best_move = next_board

    return best_move

def play_game():
    initialize_states()
    value_iterate()

    board = [[-1 for _ in range(3)] for _ in range(3)]
    turn = 0
    while not isEndOfGame(board):
        print_board(board)

        if turn == 0:
            print("AI's turn (X):")
            board = get_best_move(board, turn)
        else:
            print("Your turn (O):")
            while True:
                try:
                    move = input("Enter row,col (e.g. '1,2'): ")
                    i, j = map(int, move.split(','))
                    if 0 <= i <= 2 and 0 <= j <= 2 and board[i][j] == -1:
                        board[i][j] = turn
                        break
                    else:
                        print("Invalid move. Try again.")
                except:
                    print("Invalid input. Try again with format 'row,col'")

        if isEndOfGame(board):
            print_board(board)
            reward = getReward(board, 0)
            if reward == 1:
                print("AI wins!")
            elif reward == -1:
                print("You win!")
            else:
                print("It's a draw!")
            break

        turn = 1 - turn

play_game()

Iteration: 0
Converged after 5 iterations


  0 1 2
0  | | 
  -+-+-
1  | | 
  -+-+-
2  | | 


AI's turn (X):


  0 1 2
0 X| | 
  -+-+-
1  | | 
  -+-+-
2  | | 


Your turn (O):
Enter row,col (e.g. '1,2'): 1,1


  0 1 2
0 X| | 
  -+-+-
1  |O| 
  -+-+-
2  | | 


AI's turn (X):


  0 1 2
0 X|X| 
  -+-+-
1  |O| 
  -+-+-
2  | | 


Your turn (O):
Enter row,col (e.g. '1,2'): 0,2


  0 1 2
0 X|X|O
  -+-+-
1  |O| 
  -+-+-
2  | | 


AI's turn (X):


  0 1 2
0 X|X|O
  -+-+-
1  |O| 
  -+-+-
2 X| | 


Your turn (O):
Enter row,col (e.g. '1,2'): 1,0


  0 1 2
0 X|X|O
  -+-+-
1 O|O| 
  -+-+-
2 X| | 


AI's turn (X):


  0 1 2
0 X|X|O
  -+-+-
1 O|O|X
  -+-+-
2 X| | 


Your turn (O):
Enter row,col (e.g. '1,2'): 2,1


  0 1 2
0 X|X|O
  -+-+-
1 O|O|X
  -+-+-
2 X|O| 


AI's turn (X):


  0 1 2
0 X|X|O
  -+-+-
1 O|O|X
  -+-+-
2 X|O|X


It's a draw!
