# Training the Board Game AI

This notebook is used to train the AI for the board game using Q-learning, with visualizations for better transparency and progress tracking across multiple training sessions.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import os
from tqdm.notebook import tqdm
from board_game_env import BoardGameEnv
from q_learning_agent import QLearningAgent

# Set random seed for reproducibility
np.random.seed(42)

# Create a directory for saving models if it doesn't exist
os.makedirs("models", exist_ok=True)

## Define the training function

This function trains the Q-learning agent and provides visualizations of the training progress.

In [None]:
def train_agent(env, agent, num_episodes=10000, save_interval=1000, plot_interval=1000):
    """
    Train the Q-learning agent and visualize the training progress.

    Args:
        env (BoardGameEnv): The game environment.
        agent (QLearningAgent): The Q-learning agent to train.
        num_episodes (int): The number of episodes to train for.
        save_interval (int): The interval at which to save the model.
        plot_interval (int): The interval at which to update the plot.

    Returns:
        tuple: (trained_agent, rewards, win_rates)
    """
    rewards = []
    win_rates = []
    
    # Create a tqdm progress bar
    pbar = tqdm(total=num_episodes, desc="Training Progress", position=0, leave=True)
    
    fig, ax = plt.subplots(figsize=(12, 4))
    
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        episode_reward = 0
        
        while not done:
            valid_actions = env.get_valid_actions()
            action = agent.choose_action(state, valid_actions)
            next_state, reward, done, _ = env.step(action)
            
            agent.update_q_value(state, action, reward, next_state)
            state = next_state
            episode_reward += reward
        
        rewards.append(episode_reward)
        
        # Update the progress bar
        pbar.update(1)
        
        # Update training progress every 100 episodes
        if (episode + 1) % 100 == 0:
            win_rate = sum(rewards[-100:]) / 200 + 0.5  # Convert rewards to win rate
            win_rates.append(win_rate)
            agent.update_training_history(episode + 1, win_rate)
            
            # Update progress bar description
            pbar.set_description(f"Training Progress - Win Rate: {win_rate:.2f}")
        
        # Plot win rate over time at specified intervals
        if (episode + 1) % plot_interval == 0:
            ax.clear()
            ax.plot(range(100, len(win_rates) * 100 + 1, 100), win_rates)
            ax.set_title(f"AI Win Rate Over Time (Version {agent.version})")
            ax.set_xlabel("Episodes")
            ax.set_ylabel("Win Rate")
            plt.draw()
            plt.pause(0.1)
        
        # Save the model at specified intervals
        if (episode + 1) % save_interval == 0:
            saved_filename = agent.save_model_with_version("models")
            print(f"\nModel saved as {saved_filename}")
    
    # Close the progress bar
    pbar.close()
    plt.close(fig)

    return agent, rewards, win_rates

## Initialize the environment and agent

In [None]:
# Create the game environment and Q-learning agent
env = BoardGameEnv()
agent = QLearningAgent(state_size=env.board_size * env.board_size, action_size=env.board_size * env.board_size)

# Load the latest model if available
if agent.load_latest_model("models"):
    print(f"Loaded model version {agent.version}")
else:
    print("No previous model found. Starting from scratch.")

# Set the number of training episodes
num_episodes = 10000  # You can adjust this value

## Train the agent

Run the training process and visualize the results.

In [None]:
# Train the agent and collect results
trained_agent, rewards, win_rates = train_agent(env, agent, num_episodes, plot_interval=1000)

## Plot final training results

In [None]:
# Plot the final win rate over time
plt.figure(figsize=(12, 4))
plt.plot(range(100, len(win_rates) * 100 + 1, 100), win_rates)
plt.title(f"AI Win Rate Over Time (Version {agent.version})")
plt.xlabel("Episodes")
plt.ylabel("Win Rate")
plt.show()

## Compare performance across versions

In [None]:
def plot_version_comparison(directory):
    """
    Plot a comparison of win rates across different model versions.
    
    Args:
        directory (str): The directory containing the saved models.
    """
    versions = []
    win_rates = []
    
    for filename in os.listdir(directory):
        if filename.startswith("model_v") and filename.endswith(".json"):
            temp_agent = QLearningAgent(state_size=env.board_size * env.board_size, action_size=env.board_size * env.board_size)
            temp_agent.load_model(os.path.join(directory, filename))
            
            versions.append(temp_agent.version)
            win_rates.append(temp_agent.training_history[-1]["win_rate"])
    
    plt.figure(figsize=(12, 4))
    plt.bar(versions, win_rates)
    plt.title("Win Rate Comparison Across Model Versions")
    plt.xlabel("Model Version")
    plt.ylabel("Final Win Rate")
    plt.ylim(0, 1)
    plt.show()

# Plot version comparison
plot_version_comparison("models")

## Play a test game

Let's play a test game to see how the trained AI performs.

In [None]:
def play_test_game(env, agent):
    """
    Play a test game using the trained agent.

    Args:
        env (BoardGameEnv): The game environment.
        agent (QLearningAgent): The trained Q-learning agent.
    """
    state = env.reset()
    done = False
    
    while not done:
        clear_output(wait=True)
        display(env.get_board_image())
        
        valid_actions = env.get_valid_actions()
        action = agent.choose_action(state, valid_actions)
        state, reward, done, _ = env.step(action)
        
        print(f"AI's move: {action // env.board_size}, {action % env.board_size}")
        
    clear_output(wait=True)
    display(env.get_board_image())
    
    if reward == 1:
        print("AI wins!")
    elif reward == -1:
        print("AI loses!")
    else:
        print("It's a draw!")

# Play a test game
play_test_game(env, trained_agent)