In [10]:
# !pip install numpy
import numpy as np 
import random 

In [11]:
#display the tic tac toe board
def display_board(board):
    print("\n".join([" |".join(board[i*3:(i+1)*3]) for i in range(3)]))
    print("-" * 9)

In [12]:
# Check if there's a winner
def check_winner(board, player):
    wins = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
    return any(all(board[i] == player for i in win) for win in wins)

In [13]:
# Q-Learning agent functions
q_table = {} # Stores Q-values for state-action pairs
def get_state(board):
    return "".join(board)

In [14]:
def choose_action(board, epsilon=0.1):
    state = get_state(board)
    if random.random() < epsilon or state not in q_table:
        return random. choice([i for i, x in enumerate(board) if x == ' '])
    return max((i for i, x in enumerate(board) if x == ' '), key=lambda x: q_table[state].get(x, 0))

In [15]:
def update_q(state, action, reward, next_state, alpha=0.5, gamma=0.9):
    if state not in q_table:
        q_table[state] = {}
    old_q = q_table[state].get(action, 0)
    next_max = max(q_table.get(next_state, {}).values(), default=0)
    q_table[state] [action] = old_q + alpha * (reward + gamma * next_max - old_q)

In [16]:
# Play one episode of Tic Tac Toe with Q-learning
# board = [' '] * 9
# while True:
# 	state = get_state(board)
# 	action = choose_action(board)
# 	board[action] = 'X'
# 	if check_winner(board, 'X'):
# 		update_q(state, action, 1, get_state(board))
# 		break
# 	if ' ' not in board:
# 		update_q(state, action, 0.5, get_state(board))
# 		break
# 	opp_action = random.choice([i for i, x in enumerate(board) if x == ' '])
# 	board[opp_action] = '0'
# 	if check_winner(board, '0'):
# 		update_q(state, action, -1, get_state(board))
# 		break
# 	update_q(state, action, 0, get_state(board))

# Training the AI through reinforcement Learning
def train_ai(episodes=5000):
	for _ in range(episodes):
		board = [' '] * 9
		while True:
			state = get_state(board)
			action = choose_action(board)
			board[action] = 'X'
			if check_winner(board, 'X'):
				update_q(state, action, 1, get_state(board))
				break
			if ' ' not in board:
				update_q(state, action, 0.5, get_state(board))
				break
			opp_action = random.choice([i for i, x in enumerate(board) if x == ' '])
			board[opp_action] = '0'
			if check_winner(board, '0'):
				update_q(state, action, -1, get_state(board))
				break
			update_q(state, action, 0, get_state(board)
)

In [17]:
# Play a game against the trained AI
def play_game():
	board = [' '] * 9
	while True:
		display_board(board)
		ai_action = choose_action(board, epsilon=0)  # No exploration in test
		board[ai_action] = 'X'
		print("\nAI moved:")
		display_board(board)
		
		if check_winner(board, 'X'):
			print("AI wins!")
			break
		if ' ' not in board:
			print("It's a draw!")
			break
		
		while True:
			try:
				player_action = int(input("\nEnter your move (0-8): "))
				if board[player_action] == ' ':
					board[player_action] = '0'
					break
				print("Invalid move, try again.")
			except ValueError:
				print("Invalid input, try again.")
		
		if check_winner(board, '0'):
			display_board(board)
			print("You win!")
			break
		if ' ' not in board:
			display_board(board)
			print("It's a draw!")
			break

In [19]:
#main
print("Training AI...")
train_ai()
print("Training completed. Let's play!")
print("game starts! you are 0 and ai is X")
print("positions ((0-8))")
print(np.arange(9).reshape(3,3))

Training AI...
Training completed. Let's play!
game starts! you are 0 and ai is X
positions ((0-8))
[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [20]:
play_game()

  |  | 
  |  | 
  |  | 
---------

AI moved:
X |  | 
  |  | 
  |  | 
---------
X |0 | 
  |  | 
  |  | 
---------

AI moved:
X |0 | 
X |  | 
  |  | 
---------
X |0 | 
X |  | 
0 |  | 
---------

AI moved:
X |0 | 
X |X | 
0 |  | 
---------
X |0 | 
X |X | 
0 |0 | 
---------

AI moved:
X |0 | 
X |X |X
0 |0 | 
---------
AI wins!
