## Load the dataset

In [None]:
## Load the dataset
import pickle

# Load the dataset from a .pkl file
with open('connect4_data.pkl', 'rb') as f:
    data = pickle.load(f)

required_keys = ['X_train', 'Y_train', 'X_val', 'Y_val', 'X_test', 'Y_test']
for key in required_keys:
    if key not in data:
        raise KeyError(f"Missing required key in dataset: {key}")


## Extract Arrays

In [None]:
# Extract the datasets
X_train = data['X_train']
Y_train = data['Y_train']
X_val = data['X_val']
Y_val = data['Y_val']
X_test = data['X_test']
Y_test = data['Y_test']

print("Data successfully loaded from connect4_data.pkl.")
print(f"Training data shape: {X_train.shape}, {Y_train.shape}")
print(f"Validation data shape: {X_val.shape}, {Y_val.shape}")
print(f"Test data shape: {X_test.shape}, {Y_test.shape}")


## Data Preprocessing

In [None]:
# Normalize input data if necessary
X_train = X_train / 1.0  # Scale values, e.g., divide by max value if needed
X_val = X_val / 1.0
X_test = X_test / 1.0

from tensorflow.keras.utils import to_categorical

# Convert labels to one-hot encoding
num_classes = 7  # Assuming 7 possible moves
Y_train = to_categorical(Y_train, num_classes=num_classes)
Y_val = to_categorical(Y_val, num_classes=num_classes)
Y_test = to_categorical(Y_test, num_classes=num_classes)

## CNN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=20, batch_size=32)

## Transformer

In [None]:
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.models import Model
import tensorflow as tf

def transformer_block(inputs, num_heads, key_dim, ff_dim):
    attn_output = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(inputs, inputs)
    attn_output = tf.keras.layers.LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    ffn = tf.keras.Sequential([
        Dense(ff_dim, activation='relu'),
        Dense(inputs.shape[-1])
    ])
    ffn_output = ffn(attn_output)
    ffn_output = tf.keras.layers.LayerNormalization(epsilon=1e-6)(attn_output + ffn_output)

    return ffn_output

input_layer = Input(shape=X_train.shape[1:])
flattened = Flatten()(input_layer)
transformer_output = transformer_block(flattened, num_heads=4, key_dim=8, ff_dim=32)
output_layer = Dense(num_classes, activation='softmax')(transformer_output)

transformer_model = Model(inputs=input_layer, outputs=output_layer)
transformer_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
transformer_model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=20, batch_size=32)

## Test

In [None]:
## Test
cnn_accuracy = cnn_model.evaluate(X_test, Y_test)
transformer_accuracy = transformer_model.evaluate(X_test, Y_test)
print(f"CNN Accuracy: {cnn_accuracy[1]}")
print(f"Transformer Accuracy: {transformer_accuracy[1]}")