# Training the Board Game AI using Deep Q-Network (DQN)

This notebook is used to train the AI for the board game using Deep Q-Network (DQN) with PyTorch, enabling GPU acceleration on Windows and maintaining compatibility with Mac. It includes visualizations for better transparency and progress tracking across multiple training sessions.

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from board_game_env import BoardGameEnv
from dqn_agent import DQNAgent
from tqdm import tqdm

# Set up device (CPU or CUDA)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

## Initialize the environment and agent

In [None]:
# Create the game environment and DQN agent
env = BoardGameEnv()
state_size = env.board_size * env.board_size
action_size = env.board_size * env.board_size
agent = DQNAgent(state_size, action_size, device)

# Set training parameters
num_episodes = 10000
max_steps = 100
batch_size = 32
update_target_every = 100

## Train the agent

Run the training process and visualize the results.

In [None]:
def train_dqn_agent(env, agent, num_episodes, max_steps, batch_size, update_target_every):
    """Train the DQN agent.

    Args:
        env (BoardGameEnv): The game environment.
        agent (DQNAgent): The DQN agent.
        num_episodes (int): Number of episodes to train.
        max_steps (int): Maximum steps per episode.
        batch_size (int): Batch size for training.
        update_target_every (int): Number of episodes between target network updates.

    Returns:
        list: Episode rewards.
        list: Episode lengths.
    """
    episode_rewards = []
    episode_lengths = []

    for episode in tqdm(range(num_episodes)):
        state = env.reset()
        total_reward = 0
        for step in range(max_steps):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            if len(agent.memory) > batch_size:
                agent.replay()

            if done:
                break

        episode_rewards.append(total_reward)
        episode_lengths.append(step + 1)

        if episode % update_target_every == 0:
            agent.update_target_model()

    return episode_rewards, episode_lengths

# Train the agent and collect results
rewards, lengths = train_dqn_agent(env, agent, num_episodes, max_steps, batch_size, update_target_every)

## Plot training results

In [None]:
def plot_training_results(rewards, lengths):
    """Plot the training results.

    Args:
        rewards (list): Episode rewards.
        lengths (list): Episode lengths.
    """
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))

    ax1.plot(rewards)
    ax1.set_title('Episode Rewards')
    ax1.set_xlabel('Episode')
    ax1.set_ylabel('Reward')

    ax2.plot(lengths)
    ax2.set_title('Episode Lengths')
    ax2.set_xlabel('Episode')
    ax2.set_ylabel('Length')

    plt.tight_layout()
    plt.show()

plot_training_results(rewards, lengths)

## Save the trained model

In [None]:
agent.save('dqn_model.pth')
print("Model saved successfully.")

## Play a test game

In [None]:
def play_test_game(env, agent):
    """Play a test game using the trained agent.

    Args:
        env (BoardGameEnv): The game environment.
        agent (DQNAgent): The trained DQN agent.
    """
    state = env.reset()
    done = False
    total_reward = 0

    while not done:
        env.render()
        action = agent.act(state)
        state, reward, done, _ = env.step(action)
        total_reward += reward

    env.render()
    print(f"Game over. Total reward: {total_reward}")

play_test_game(env, agent)