<a href="https://colab.research.google.com/github/Vizarb/tic_tac_toe_ai/blob/main/tic_tac_toe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# tensorflow only works on python 3.11 and below adust the version if needed

In [1]:
# import sys
# !{sys.executable} -m pip install tensorflow



# **i think the current problem are: either quality data generation and or making the AI try to take better positions (ie corners and center) for the success chance to reach above 35%**

In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import random

# For reproducibility
np.random.seed(42)
random.seed(42)

def validate_data(X, y):
    """
    Validates the input and output data to ensure correct shapes and values.

    Parameters:
    - X (np.ndarray): The input board states.
    - y (np.ndarray): The output labels (one-hot encoded move).

    Returns:
    - bool: True if validation is successful, False otherwise.
    """
    # Check that X has the correct shape
    if X.shape[-3:] != (3, 3, 1):
        print(f"Invalid input shape. Expected (3, 3, 1), but got {X.shape[-3:]}.")
        return False

    # Check that y has the correct shape and one-hot encoding
    if y.shape[-1] != 9:
        print(f"Invalid output shape. Expected 9 possible positions, but got {y.shape[-1]}.")
        return False

    if not np.all(np.isin(X, [-1, 0, 1])):  # Check that the input board contains only valid values (-1, 0, 1)
        print("Input data contains invalid values. Only -1, 0, and 1 are allowed.")
        return False

    return True

""" made it worse
def augment_board(board):

    # Generates rotated and flipped versions of a Tic-Tac-Toe board.

    # Parameters:
    # - board (np.ndarray): The original Tic-Tac-Toe board.

    # Returns:
    # - list: A list of augmented boards.

    augmented_boards = [board]

    # 90-degree rotation
    augmented_boards.append(np.rot90(board))

    # 180-degree rotation (two 90-degree rotations)
    augmented_boards.append(np.rot90(np.rot90(board)))

    # 270-degree rotation (three 90-degree rotations)
    augmented_boards.append(np.rot90(np.rot90(np.rot90(board))))

    # Horizontal flip
    augmented_boards.append(np.flip(board, axis=1))

    # Vertical flip
    augmented_boards.append(np.flip(board, axis=0))

    return augmented_boards
"""

def generate_tictactoe_data(num_samples):
    """
    Generates random Tic-Tac-Toe game states and corresponding moves.

    Parameters:
    - num_samples (int): The number of samples (board states) to generate.

    Returns:
    - X (np.ndarray): A numpy array of shape (num_samples, 3, 3, 1) representing the board states.
    - y (np.ndarray): A numpy array of shape (num_samples, 9) representing the one-hot encoded move (position on the board).
    """
    X = []
    y = []

    for _ in range(num_samples):
        # Start with an empty 3x3 board
        board = np.zeros((3, 3))  # Empty board (0: empty, 1: player 1, -1: player 2)
        current_player = 1  # Player 1 starts the game

        # Simulate a random sequence of moves, alternating between players
        moves = []
        for _ in range(random.randint(1, 9)):  # Random number of moves from 1 to 9
            valid_positions = [(r, c) for r in range(3) for c in range(3) if board[r, c] == 0]
            if not valid_positions:
                break  # No more valid moves (board is full)

            # Choose a random empty spot
            chosen_position = random.choice(valid_positions)
            chosen_row, chosen_col = chosen_position

            # Apply the move
            board[chosen_row, chosen_col] = current_player
            moves.append((chosen_row, chosen_col))

            # Alternate the player
            current_player = -current_player

        # Select the next valid move (to predict)
        valid_positions = [(r, c) for r in range(3) for c in range(3) if board[r, c] == 0]
        if valid_positions:
            chosen_position = random.choice(valid_positions)
            chosen_row, chosen_col = chosen_position

            # Prepare the input (board) and output (one-hot encoded move)
            X.append(board.reshape(3, 3, 1))  # Reshape to (3, 3, 1) to match model input
            y_one_hot = np.zeros(9)  # 9 possible positions on the board
            y_one_hot[chosen_row * 3 + chosen_col] = 1  # Set the chosen position to 1
            y.append(y_one_hot)

    # Convert the lists to numpy arrays
    X = np.array(X)
    y = np.array(y)

    print(f"Generated data shape: X = {X.shape}, y = {y.shape}")

    # Validate data before returning
    if not validate_data(X, y):
        raise ValueError("Generated data is invalid. Check the data generation logic.")

    return X, y

# Example usage to generate data
X_train, y_train = generate_tictactoe_data(num_samples=20000)  # Generate training data
X_test, y_test = generate_tictactoe_data(num_samples=4000)  # Generate test data

# Check data shapes
print(f"Training data shape: X_train = {X_train.shape}, y_train = {y_train.shape}")
print(f"Test data shape: X_test = {X_test.shape}, y_test = {y_test.shape}")

# Define the model
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(3, 3, 1), padding='SAME'),
    BatchNormalization(),
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(9, activation='softmax')
])


# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Summarize the model
model.summary()

# Define callbacks
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('./models/tic_tac_toe_model_best.keras', save_best_only=True)
# lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6) dosent help currently

# Train the model with data (X_train, y_train)
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping, model_checkpoint])

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

# Make predictions on the test set
predictions = model.predict(X_test)  # X_test contains the board states

# Get the predicted position (index of highest probability)
predicted_position = np.argmax(predictions, axis=1)  # Get the index (position) with the highest probability

# Print out some examples of predicted moves and the actual moves
# for i in range(5):
#     print(f"Predicted move: Position {predicted_position[i]}, Actual move: Position {np.argmax(y_test[i])}")

# Save the final model after training
model.save('./models/tic_tac_toe_model_final.keras')

# To load the model later (for example, for inference or continued training):
# loaded_model = tf.keras.models.load_model('./models/tic_tac_toe_model_final.keras')


Generated data shape: X = (17716, 3, 3, 1), y = (17716, 9)
Generated data shape: X = (3530, 3, 3, 1), y = (3530, 9)
Training data shape: X_train = (17716, 3, 3, 1), y_train = (17716, 9)
Test data shape: X_test = (3530, 3, 3, 1), y_test = (3530, 9)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.1468 - loss: 2.1635 - val_accuracy: 0.2729 - val_loss: 1.9864
Epoch 2/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.2756 - loss: 1.8707 - val_accuracy: 0.3231 - val_loss: 1.7100
Epoch 3/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3227 - loss: 1.7069 - val_accuracy: 0.3307 - val_loss: 1.6122
Epoch 4/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3353 - loss: 1.6186 - val_accuracy: 0.3282 - val_loss: 1.5458
Epoch 5/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3528 - loss: 1.5547 - val_accuracy: 0.3299 - val_loss: 1.5130
Epoch 6/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3561 - loss: 1.5097 - val_accuracy: 0.3338 - val_loss: 1.4733
Epoch 7/20
[1m443/443[0m 