In [1]:
import sys
import traceback
import torch
import torch.nn as nn
import torch.optim as optim
import random

def print_exception():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    traceback.print_exception(exc_type, exc_value, exc_traceback)

# Define the game environment
class TicTacToeEnv:
    def __init__(self):
        self.board = [[0, 0, 0] for _ in range(3)]
        self.player = 1

    def reset(self):
        self.board = [[0, 0, 0] for _ in range(3)]
        self.player = 1
        return [0 for _ in range(9)]
        
    def step(self, action):
        x, y = action
        if self.board[x][y] == 0:
            self.board[x][y] = self.player
            self.player = -self.player

            # Check if the game is over
            if self.check_game_over():
                return self.board, self.get_reward(), True
            else:
                return self.board, 0, False
        else:
            return self.board, -1, True

    def check_game_over(self):
        # Check rows
        for row in self.board:
            if row[0] == row[1] == row[2] and row[0] != 0:
                return True

        # Check columns
        for col in range(3):
            if self.board[0][col] == self.board[1][col] == self.board[2][col] and self.board[0][col] != 0:
                return True

        # Check diagonals
        if self.board[0][0] == self.board[1][1] == self.board[2][2] and self.board[0][0] != 0:
            return True
        if self.board[0][2] == self.board[1][1] == self.board[2][0] and self.board[0][2] != 0:
            return True

        # Check if the board is full
        for row in self.board:
            for cell in row:
                if cell == 0:
                    return False

        # If none of the above conditions are met, the game is a draw
        return True

    def get_reward(self):
        # Check if player 1 won
        if self.check_win(1):
            return 1
        # Check if player -1 won
        elif self.check_win(-1):
            return -1
        # Otherwise, the game is a draw
        else:
            return 0

    def check_win(self, player):
        # Check rows
        for row in self.board:
            if row[0] == row[1] == row[2] == player:
                return True

        # Check columns
        for col in range(3):
            if self.board[0][col] == self.board[1][col] == self.board[2][col] == player:
                return True

        # Check diagonals
        if self.board[0][0] == self.board[1][1] == self.board[2][2] == player:
            return True
        if self.board[0][2] == self.board[1][1] == self.board[2][0] == player:
            return True

        # If none of the above conditions are met, the player has not won
        return False

# Define the AI agent
class TicTacToeAgent(nn.Module):
    def __init__(self):
        super(TicTacToeAgent, self).__init__()
        self.fc1 = nn.Linear(9, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 9)

    def forward(self, x):
        x = torch.FloatTensor(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def act(self, state):
        # Convert the state to a tensor
        state = torch.FloatTensor(state)

        # Use the forward pass to get the action scores
        action_scores = self.forward(state)

        # Choose the action with the highest score
        _, action = torch.max(action_scores, dim=0)

        # Convert the action to a tuple of coordinates
        return (action // 3, action % 3)

# Play a game of tic-tac-toe between two agents
def play_game(agent_1, agent_2):
    env = TicTacToeEnv()
    state = env.reset()
    player = 1

    while True:
        # Agent 1 plays
        if player == 1:
            action = agent_1.act(state)
            state, reward, done = env.step(action)
            if done:
                return reward

        # Agent 2 plays
        else:
            action = agent_2.act(state)
            state, reward, done = env.step(action)
            if done:
                return reward

        player = -player

# Train an agent using deep reinforcement learning
def train(agent, num_episodes, discount_factor=0.99):
    optimizer = optim.Adam(agent.parameters())

    for i in range(num_episodes):
        # Play a game against a random agent
        reward = play_game(agent, RandomAgent())
        # Compute the loss
        loss = -torch.log(agent.forward(state)[action]) * reward
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # Print the loss every 1000 episodes
        if i % 1000 == 0:
            print(f'Episode {i}: loss = {loss.item():.4f}')

# Define the random agent
class RandomAgent:
    def act(self, state):
        return random.choice([(i, j) for i in range(3) for j in range(3) if state[3 * i + j] == 0])

# Main function
def main():
    # Create the agent
    agent = TicTacToeAgent()

    # Train the agent
    train(agent, 10000)

if __name__ == '__main__':
    main()


IndexError: ignored