In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models

## Load the dataset

In [6]:
# Load Data
# Assuming the data is in a list of dictionaries as described
file_path = 'mallika.pkl'
data = pd.read_pickle(file_path)

## Data Preprocessing

In [7]:
# Preprocess Data
def preprocess_data(data):
    boards = []
    labels = []
    for item in data:
        board = np.array(item['board'])
        label = item['recommended_column']
        boards.append(board)
        labels.append(label)
    
    boards = np.array(boards)
    labels = np.array(labels)
    
    # One-hot encode labels (7 possible moves)
    labels = tf.keras.utils.to_categorical(labels, num_classes=7)
    return boards, labels

boards, labels = preprocess_data(data)

# Split into train and validation sets
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(boards, labels, test_size=0.2, random_state=42)

## CNN

In [9]:
def build_cnn():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(6, 7, 2), padding="same"),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu', padding="same"),  # Use "same" padding
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(7, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

cnn_model = build_cnn()


# Train CNN
cnn_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32)


Epoch 1/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.2184 - loss: 1.8685 - val_accuracy: 0.3233 - val_loss: 1.6709
Epoch 2/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3349 - loss: 1.6518 - val_accuracy: 0.3689 - val_loss: 1.5739
Epoch 3/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3671 - loss: 1.5638 - val_accuracy: 0.3983 - val_loss: 1.5025
Epoch 4/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.4016 - loss: 1.4972 - val_accuracy: 0.4030 - val_loss: 1.4766
Epoch 5/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.4130 - loss: 1.4537 - val_accuracy: 0.4162 - val_loss: 1.4217
Epoch 6/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.4364 - loss: 1.4163 - val_accuracy: 0.4273 - val_loss: 1.4241
Epoch 7/20
[1m787/787[0m 

<keras.src.callbacks.history.History at 0x322e39130>

## Transformer

In [11]:
def build_transformer():
    input_layer = layers.Input(shape=(6, 7, 2))
    reshaped = layers.Reshape((42, 2))(input_layer)

    # Multi-Head Self-Attention
    attn_output = layers.MultiHeadAttention(num_heads=4, key_dim=32)(reshaped, reshaped)
    attn_output = layers.LayerNormalization()(attn_output + reshaped)

    # Feed-Forward Network
    ffn = layers.Dense(128, activation='relu')(attn_output)
    ffn = layers.Dense(2)(ffn)  # Match the shape of the input (42, 2)
    ffn_output = layers.LayerNormalization()(ffn + attn_output)

    # Classification Head
    flattened = layers.Flatten()(ffn_output)
    dense = layers.Dense(128, activation='relu')(flattened)
    output_layer = layers.Dense(7, activation='softmax')(dense)

    model = models.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

transformer_model = build_transformer()


# Train Transformer
transformer_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32)

Epoch 1/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.2551 - loss: 1.8353 - val_accuracy: 0.3230 - val_loss: 1.6892
Epoch 2/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.3437 - loss: 1.6488 - val_accuracy: 0.3273 - val_loss: 1.6598
Epoch 3/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.3655 - loss: 1.6032 - val_accuracy: 0.3446 - val_loss: 1.6401
Epoch 4/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.3770 - loss: 1.5719 - val_accuracy: 0.3543 - val_loss: 1.6230
Epoch 5/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.3923 - loss: 1.5502 - val_accuracy: 0.3626 - val_loss: 1.6185
Epoch 6/20
[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.4004 - loss: 1.5278 - val_accuracy: 0.3551 - val_loss: 1.6247
Epoch 7/20
[1m787/787[0m 

<keras.src.callbacks.history.History at 0x326f3b3e0>

## Test

In [12]:
# Evaluate Models
def evaluate_models(model, X_val, y_val):
    loss, accuracy = model.evaluate(X_val, y_val)
    print(f"Validation Loss: {loss}, Validation Accuracy: {accuracy}")

evaluate_models(cnn_model, X_val, y_val)
evaluate_models(transformer_model, X_val, y_val)

# Further Testing Against MCTS (Placeholder)
def test_against_mcts(model):
    # Implement MCTS testing logic here
    pass
test_against_mcts(cnn_model)
test_against_mcts(transformer_model)


[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 686us/step - accuracy: 0.4775 - loss: 1.3569
Validation Loss: 1.3543078899383545, Validation Accuracy: 0.4690828025341034
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3771 - loss: 1.6158
Validation Loss: 1.6246517896652222, Validation Accuracy: 0.3711651563644409
