In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import cv2

# Assuming this script is in the same directory level as the 'input' folder
input_dir = os.path.join(os.getcwd(), 'input')
classes = ['input/Rook-resize', 'input/pawn_resized', 'input/knight-resize', 'input/Queen-Resized', 'input/bishop_resized']




In [2]:
def feature_builder(clss):
    lst = []
    for img in os.listdir(clss):
        f = cv2.imread(os.path.join(clss, img), cv2.IMREAD_GRAYSCALE)
        f = cv2.resize(f, (100, 100))
        f = f / 255.0
        lst.append(f)
    return lst

In [3]:
features = []
for c in classes:
    features += feature_builder(c)

In [4]:
X = np.array(features)
X = X.reshape(-1, 100, 100, 1)

In [5]:
labels = []
for i, c in enumerate(classes):
    labels += [i] * len(os.listdir(c))

In [6]:
Y = to_categorical(np.array(labels))

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8)

In [12]:
# Patch creation for Vision Transformer
class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding='VALID',
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

In [13]:
# Patch encoding for Vision Transformer
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

In [None]:
# MLP layer creator
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

In [14]:
# Vision Transformer (ViT) Model
def create_vit_classifier():
    input_shape = (100, 100, 1)
    num_classes = 5

    inputs = layers.Input(shape=input_shape)
    
    # Create patches
    patches = Patches(patch_size)(inputs)
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block
    for _ in range(transformer_layers):
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        x2 = layers.Add()([attention_output, encoded_patches])
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    logits = layers.Dense(num_classes)(features)

    model = tf.keras.Model(inputs=inputs, outputs=logits)
    return model

In [15]:
# Parameters
patch_size = 6  # Size of the patches to be extract from the input images
num_patches = (100 // patch_size) ** 2
projection_dim = 64
transformer_layers = 8
transformer_units = [
    projection_dim * 2,
    projection_dim,
]  # Size of the transformer layers
mlp_head_units = [2048, 1024]  # Size of the dense layers of the final classifier
num_heads = 4

In [16]:
# Model Compilation
vit_classifier = create_vit_classifier()
vit_classifier.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

NameError: name 'mlp' is not defined

In [None]:
# Model Training
history = vit_classifier.fit(
    X_train, Y_train, batch_size=32, epochs=20, validation_split=0.1
)