In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split

DATA_DIR = "quickData"        
SAMPLES_PER_CLASS = 10000      
IMG_SIZE = 28                 

target_classes = [
    'alarm clock', 'apple', 'arm', 'axe', 'banana', 'baseball bat', 'bed', 'bench', 'book', 'broom',
    'bucket', 'bus', 'butterfly', 'cactus', 'campfire', 'candle', 'cannon', 'car', 'carrot', 'circle',
    'clock', 'cloud', 'compass', 'cookie', 'crayon', 'crown', 'diamond', 'donut', 'dumbbell', 'envelope',
    'fan', 'flashlight', 'foot', 'fork', 'hammer', 'hand', 'hourglass', 'knife', 'ladder', 'leaf',
    'light bulb', 'lollipop', 'moon', 'mountain', 'mushroom', 'nail', 'pencil', 'pizza', 'smiley face',
    'square', 'star', 'stethoscope', 'sun', 'toothbrush', 'tree', 'triangle', 'umbrella', 'wheel',
    'windmill', 'wine bottle'
]

NUM_CLASSES = len(target_classes)

selected_files = [cls + ".npy" for cls in target_classes]
class_names = target_classes  

# ---------- LOAD DATA ----------
images = []
labels = []

for class_index, file_name in enumerate(selected_files):
    path = os.path.join(DATA_DIR, file_name)
    if not os.path.exists(path):
        raise FileNotFoundError(f"Missing file: {file_name} in {DATA_DIR}")
    
    data = np.load(path)
    selected_data = data[:SAMPLES_PER_CLASS]
    images.append(selected_data)
    labels.extend([class_index] * len(selected_data))

# ---------- FORMAT & NORMALIZE ----------
X = np.vstack(images).reshape(-1, IMG_SIZE, IMG_SIZE, 1).astype('float32') / 255.0
y = np.array(labels)

# ---------- SPLIT DATA ----------
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# ---------- OUTPUT SHAPES ----------
print(f"Loaded {NUM_CLASSES} classes.")
print(f"Train shape: {X_train.shape}, Labels: {y_train.shape}")
print(f"Val shape: {X_val.shape}, Labels: {y_val.shape}")
print(f"Test shape: {X_test.shape}, Labels: {y_test.shape}")


Loaded 60 classes.
Train shape: (480000, 28, 28, 1), Labels: (480000,)
Val shape: (60000, 28, 28, 1), Labels: (60000,)
Test shape: (60000, 28, 28, 1), Labels: (60000,)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD

# One-hot encode labels
y_train_cat = to_categorical(y_train, num_classes=NUM_CLASSES)
y_val_cat = to_categorical(y_val, num_classes=NUM_CLASSES)
y_test_cat = to_categorical(y_test, num_classes=NUM_CLASSES)

model_accuracies = {}

CNN v1: Baseline CNN

In [3]:
model_cnn_v1 = Sequential([
    Input(shape=(28, 28, 1)),  
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(NUM_CLASSES, activation='softmax')
])

# ---------- COMPILE ----------
model_cnn_v1.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_cnn_v1.fit(X_train, y_train_cat,
                    validation_data=(X_val, y_val_cat),
                    epochs=10,
                    batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_cnn_v1.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Baseline CNN (v1)'] = test_acc
print(f"\n Baseline CNN (v1): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_cnn_v1.save('quickdraw_cnn_model.keras')  
print("Model saved to 'quickdraw_cnn_model.keras'")


Epoch 1/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 9ms/step - accuracy: 0.5560 - loss: 1.7141 - val_accuracy: 0.7866 - val_loss: 0.8055
Epoch 2/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 9ms/step - accuracy: 0.7604 - loss: 0.9169 - val_accuracy: 0.8154 - val_loss: 0.6937
Epoch 3/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 9ms/step - accuracy: 0.7902 - loss: 0.8013 - val_accuracy: 0.8299 - val_loss: 0.6458
Epoch 4/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 9ms/step - accuracy: 0.8049 - loss: 0.7392 - val_accuracy: 0.8345 - val_loss: 0.6228
Epoch 5/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 9ms/step - accuracy: 0.8133 - loss: 0.7054 - val_accuracy: 0.8389 - val_loss: 0.6060
Epoch 6/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 9ms/step - accuracy: 0.8202 - loss: 0.6742 - val_accuracy: 0.8423 - val_loss: 0.5938
Epoch 7/10

CNN v2: Deep CNN

In [None]:
model_cnn_v2 = Sequential([
    Input(shape=(28, 28, 1)),

    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(NUM_CLASSES, activation='softmax')
])

# ---------- COMPILE ----------
model_cnn_v2.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_cnn_v2.fit(X_train, y_train_cat,
                    validation_data=(X_val, y_val_cat),
                    epochs=10,
                    batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_cnn_v2.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Deeper CNN (v2)'] = test_acc
print(f"\n Deeper CNN (v2): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_cnn_v2.save('quickdraw_cnn_deep_model.keras')
print("Model saved to 'quickdraw_cnn_deep_model.keras'")


Epoch 1/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 10ms/step - accuracy: 0.5000 - loss: 1.9004 - val_accuracy: 0.7489 - val_loss: 0.9288
Epoch 2/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 10ms/step - accuracy: 0.7343 - loss: 0.9978 - val_accuracy: 0.7794 - val_loss: 0.8066
Epoch 3/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 10ms/step - accuracy: 0.7662 - loss: 0.8710 - val_accuracy: 0.7925 - val_loss: 0.7625
Epoch 4/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 10ms/step - accuracy: 0.7848 - loss: 0.8019 - val_accuracy: 0.8040 - val_loss: 0.7283
Epoch 5/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 10ms/step - accuracy: 0.7935 - loss: 0.7678 - val_accuracy: 0.8094 - val_loss: 0.7034
Epoch 6/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 10ms/step - accuracy: 0.8012 - loss: 0.7416 - val_accuracy: 0.8104 - val_loss: 0.6958
Epoc

CNN v3: Deep CNN + SGD + More Epochs

In [None]:
model_cnn_v3 = Sequential([
    Input(shape=(28, 28, 1)),

    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(NUM_CLASSES, activation='softmax')
])

# ---------- COMPILE ----------
model_cnn_v3.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9, nesterov=True),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_cnn_v3.fit(X_train, y_train_cat,
                    validation_data=(X_val, y_val_cat),
                    epochs=20,
                    batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_cnn_v3.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Deep CNN + SGD (v3)'] = test_acc
print(f"\n Deep CNN + SGD (v3): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_cnn_v3.save('quickdraw_cnn_sgd_model.keras')
print("Model saved to 'quickdraw_cnn_sgd_model.keras'")


Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 10ms/step - accuracy: 0.3700 - loss: 2.4716 - val_accuracy: 0.7206 - val_loss: 1.0436
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 10ms/step - accuracy: 0.6982 - loss: 1.1265 - val_accuracy: 0.7635 - val_loss: 0.8744
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 10ms/step - accuracy: 0.7409 - loss: 0.9694 - val_accuracy: 0.7840 - val_loss: 0.8061
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 10ms/step - accuracy: 0.7626 - loss: 0.8852 - val_accuracy: 0.7906 - val_loss: 0.7706
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 10ms/step - accuracy: 0.7764 - loss: 0.8341 - val_accuracy: 0.8006 - val_loss: 0.7362
Epoch 6/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 10ms/step - accuracy: 0.7853 - loss: 0.8004 - val_accuracy: 0.8016 - val_loss: 0.7270
Epoc

CNN v4: Batch Normalization + Adam

In [None]:
from tensorflow.keras.layers import BatchNormalization

model_cnn_v4 = Sequential([
    Input(shape=(28, 28, 1)),

    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(NUM_CLASSES, activation='softmax')
])

# ---------- COMPILE ----------
model_cnn_v4.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_cnn_v4.fit(X_train, y_train_cat,
                    validation_data=(X_val, y_val_cat),
                    epochs=10,
                    batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_cnn_v4.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['BatchNorm CNN (v4)'] = test_acc
print(f"\n BatchNorm CNN (v4): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_cnn_v4.save('quickdraw_cnn_batchnorm_model.keras')
print("Model saved to 'quickdraw_cnn_batchnorm_model.keras'")


Epoch 1/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 26ms/step - accuracy: 0.6130 - loss: 1.5111 - val_accuracy: 0.8082 - val_loss: 0.7190
Epoch 2/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 22ms/step - accuracy: 0.7877 - loss: 0.8001 - val_accuracy: 0.8278 - val_loss: 0.6461
Epoch 3/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 23ms/step - accuracy: 0.8102 - loss: 0.7147 - val_accuracy: 0.8360 - val_loss: 0.6205
Epoch 4/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 23ms/step - accuracy: 0.8221 - loss: 0.6692 - val_accuracy: 0.8426 - val_loss: 0.5885
Epoch 5/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 23ms/step - accuracy: 0.8294 - loss: 0.6344 - val_accuracy: 0.8456 - val_loss: 0.5831
Epoch 6/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 23ms/step - accuracy: 0.8353 - loss: 0.6133 - val_accuracy: 0.8467 - val_loss: 0.5814
Epoc

ViT v1: Baseline ViT

In [None]:
from tensorflow.keras import layers, models

# ---------- SETTINGS ----------
input_shape = (28, 28, 1)  
patch_size = 4             
num_patches = (input_shape[0] // patch_size) ** 2
embed_dim = 64
num_heads = 4
ff_dim = 128
num_transformer_blocks = 4

# ---------- Patch + Position Embedding ----------
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, embed_dim):
        super().__init__()
        self.projection = layers.Dense(embed_dim)
        self.position_embedding = layers.Embedding(input_dim=num_patches, output_dim=embed_dim)

    def call(self, patch_embeddings):
        positions = tf.range(start=0, limit=num_patches, delta=1)
        embedded = self.projection(patch_embeddings) + self.position_embedding(positions)
        return embedded

class Patcher(layers.Layer):
    def __init__(self, patch_size):
        super().__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, patch_size, patch_size, 1],
            strides=[1, patch_size, patch_size, 1],
            rates=[1, 1, 1, 1],
            padding='VALID'
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

# ---------- Transformer Encoder Block ----------
def transformer_encoder(inputs, embed_dim, num_heads, ff_dim):
    x = layers.LayerNormalization()(inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(x, x)
    x = layers.Add()([x, inputs])

    x_norm = layers.LayerNormalization()(x)
    ff = layers.Dense(ff_dim, activation='relu')(x_norm)
    ff = layers.Dense(embed_dim)(ff)
    return layers.Add()([x, ff])

# ---------- ViT Model ----------
def build_vit_model(input_shape, patch_size, num_classes):
    inputs = layers.Input(shape=input_shape)
    x = Patcher(patch_size)(inputs)
    x = PatchEncoder(num_patches=num_patches, embed_dim=embed_dim)(x)

    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, embed_dim, num_heads, ff_dim)

    x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

# ---------- COMPILE ----------
model_vit = build_vit_model(input_shape, patch_size, NUM_CLASSES)
model_vit.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_vit.summary()
# ---------- TRAIN ----------
history = model_vit.fit(X_train, y_train_cat,
                    validation_data=(X_val, y_val_cat),
                    epochs=10,
                    batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_vit.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Baseline ViT (v1)'] = test_acc
print(f"\n Baseline ViT (v1): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_vit.save('quickdraw_vit.keras') 
print("Model saved to 'quickdraw_hybrid_cnn_transformer.keras'")




Epoch 1/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m346s[0m 90ms/step - accuracy: 0.3353 - loss: 2.4867 - val_accuracy: 0.6202 - val_loss: 1.3826
Epoch 2/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m334s[0m 89ms/step - accuracy: 0.6239 - loss: 1.3828 - val_accuracy: 0.6780 - val_loss: 1.1474
Epoch 3/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m337s[0m 90ms/step - accuracy: 0.6827 - loss: 1.1653 - val_accuracy: 0.7208 - val_loss: 1.0063
Epoch 4/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m340s[0m 91ms/step - accuracy: 0.7177 - loss: 1.0444 - val_accuracy: 0.7454 - val_loss: 0.9232
Epoch 5/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m340s[0m 91ms/step - accuracy: 0.7421 - loss: 0.9538 - val_accuracy: 0.7637 - val_loss: 0.8614
Epoch 6/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m341s[0m 91ms/step - accuracy: 0.7590 - loss: 0.8906 - val_accuracy: 0.7778 - val_loss: 0.803

ViT v2: Deeper ViT

In [None]:
def build_vit_model_v2(input_shape, patch_size, num_classes):
    inputs = layers.Input(shape=input_shape)
    x = Patcher(patch_size)(inputs)
    x = PatchEncoder(num_patches=num_patches, embed_dim=embed_dim)(x)

    for _ in range(6): 
        x = transformer_encoder(x, embed_dim, num_heads, ff_dim)

    x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

# ---------- COMPILE ----------
model_vit_v2 = build_vit_model_v2(input_shape, patch_size, NUM_CLASSES)
model_vit_v2.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_vit_v2.fit(X_train, y_train_cat,
                           validation_data=(X_val, y_val_cat),
                           epochs=10,
                           batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_vit_v2.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Deeper ViT (v2)'] = test_acc
print(f"\n Deeper ViT (v2): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_vit_v2.save('quickdraw_vit_deep.keras')
print("Model saved to 'quickdraw_vit_deep.keras'")


Epoch 1/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m588s[0m 154ms/step - accuracy: 0.3060 - loss: 2.6009 - val_accuracy: 0.6155 - val_loss: 1.3952
Epoch 2/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m506s[0m 135ms/step - accuracy: 0.6226 - loss: 1.3937 - val_accuracy: 0.6973 - val_loss: 1.0899
Epoch 3/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m511s[0m 136ms/step - accuracy: 0.7034 - loss: 1.1005 - val_accuracy: 0.7435 - val_loss: 0.9307
Epoch 4/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m511s[0m 136ms/step - accuracy: 0.7423 - loss: 0.9539 - val_accuracy: 0.7686 - val_loss: 0.8474
Epoch 5/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m514s[0m 137ms/step - accuracy: 0.7638 - loss: 0.8757 - val_accuracy: 0.7861 - val_loss: 0.7777
Epoch 6/10
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m517s[0m 138ms/step - accuracy: 0.7811 - loss: 0.8163 - val_accuracy: 0.7873 - val_loss:

ViT v3: SGD + More Epochs

In [None]:
def build_vit_model_v3(input_shape, patch_size, num_classes):
    inputs = layers.Input(shape=input_shape)
    x = Patcher(patch_size)(inputs)
    x = PatchEncoder(num_patches=num_patches, embed_dim=embed_dim)(x)

    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, embed_dim, num_heads, ff_dim)

    x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

# ---------- COMPILE ----------
model_vit_v3 = build_vit_model_v3(input_shape, patch_size, NUM_CLASSES)
model_vit_v3.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9, nesterov=True),
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_vit_v3.fit(X_train, y_train_cat,
                           validation_data=(X_val, y_val_cat),
                           epochs=20,
                           batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_vit_v3.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['ViT + SGD + More Epochs (v3)'] = test_acc
print(f"\n ViT + SGD + More Epochs (v3): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_vit_v3.save('quickdraw_vit_sgd.keras')
print("Model saved to 'quickdraw_vit_sgd.keras'")


Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m369s[0m 97ms/step - accuracy: 0.2078 - loss: 3.0065 - val_accuracy: 0.5883 - val_loss: 1.4929
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m336s[0m 90ms/step - accuracy: 0.6112 - loss: 1.4386 - val_accuracy: 0.7116 - val_loss: 1.0486
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 90ms/step - accuracy: 0.6959 - loss: 1.1225 - val_accuracy: 0.7352 - val_loss: 0.9646
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m341s[0m 91ms/step - accuracy: 0.7304 - loss: 1.0066 - val_accuracy: 0.7560 - val_loss: 0.8902
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 92ms/step - accuracy: 0.7467 - loss: 0.9425 - val_accuracy: 0.7718 - val_loss: 0.8357
Epoch 6/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m345s[0m 92ms/step - accuracy: 0.7587 - loss: 0.8975 - val_accuracy: 0.7769 - val_loss: 0.810

ViT v4: Wider Embedding

In [None]:
def build_vit_model_v4(input_shape, patch_size, num_classes):
    wider_embed_dim = 128
    wider_ff_dim = 256
    wider_num_heads = 8
    inputs = layers.Input(shape=input_shape)

    num_patches_local = (input_shape[0] // patch_size) ** 2
    x = Patcher(patch_size)(inputs)
    x = PatchEncoder(num_patches=num_patches_local, embed_dim=wider_embed_dim)(x)

    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, wider_embed_dim, wider_num_heads, wider_ff_dim)

    x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)  
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

# ---------- COMPILE ----------
model_vit_v4 = build_vit_model_v4(input_shape, patch_size, NUM_CLASSES)
model_vit_v4.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_vit_v4.fit(X_train, y_train_cat,
                           validation_data=(X_val, y_val_cat),
                           epochs=20,
                           batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_vit_v4.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['ViT Wider Embed + Dropout (v4)'] = test_acc
print(f"\n ViT Wider Embed + Dropout (v4): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_vit_v4.save('quickdraw_vit_wide.keras')
print("Model saved to 'quickdraw_vit_wide.keras'")


Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m624s[0m 165ms/step - accuracy: 0.3465 - loss: 2.4716 - val_accuracy: 0.6537 - val_loss: 1.2629
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m592s[0m 158ms/step - accuracy: 0.6412 - loss: 1.3210 - val_accuracy: 0.7144 - val_loss: 1.0548
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m595s[0m 159ms/step - accuracy: 0.6947 - loss: 1.1285 - val_accuracy: 0.7416 - val_loss: 0.9485
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m599s[0m 160ms/step - accuracy: 0.7246 - loss: 1.0209 - val_accuracy: 0.7619 - val_loss: 0.8735
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m600s[0m 160ms/step - accuracy: 0.7457 - loss: 0.9437 - val_accuracy: 0.7724 - val_loss: 0.8309
Epoch 6/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m601s[0m 160ms/step - accuracy: 0.7594 - loss: 0.8893 - val_accuracy: 0.7782 - val_loss:

Hybrid v1: Baseline CNN + Transformer

In [None]:
# ---------- SETTINGS ----------
input_shape = (28, 28, 1)
embed_dim = 128
num_heads = 4
ff_dim = 256
num_transformer_blocks = 2

# ---------- Transformer Encoder Block ----------
def transformer_encoder(inputs, embed_dim, num_heads, ff_dim):
    x = layers.LayerNormalization()(inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(x, x)
    x = layers.Add()([x, inputs])

    x_norm = layers.LayerNormalization()(x)
    ff = layers.Dense(ff_dim, activation='relu')(x_norm)
    ff = layers.Dense(embed_dim)(ff)
    return layers.Add()([x, ff])

# ---------- Build Hybrid Model ----------
def build_hybrid_model(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.Flatten()(x)

    x = layers.Dense(embed_dim)(x)
    x = layers.Reshape((1, embed_dim))(x)

    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, embed_dim, num_heads, ff_dim)

    x = layers.LayerNormalization()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

# ---------- COMPILE ----------
model_hybrid_v1 = build_hybrid_model(input_shape, NUM_CLASSES)
model_hybrid_v1.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_hybrid_v1.summary()

# ---------- TRAIN ----------
history = model_hybrid_v1.fit(X_train, y_train_cat,
                    validation_data=(X_val, y_val_cat),
                    epochs=20,
                    batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_hybrid_v1.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Hybrid: 2-Conv + 2-Transformer (Baseline)'] = test_acc
print(f"\n Hybrid: 2-Conv + 2-Transformer (Baseline): {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_hybrid_v1.save('quickdraw_hybrid_cnn_transformer.keras')  
print("Model saved to 'quickdraw_hybrid_cnn_transformer.keras'")

Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 32ms/step - accuracy: 0.5845 - loss: 1.6053 - val_accuracy: 0.8152 - val_loss: 0.6781
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 32ms/step - accuracy: 0.8128 - loss: 0.7098 - val_accuracy: 0.8364 - val_loss: 0.6055
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 32ms/step - accuracy: 0.8397 - loss: 0.6031 - val_accuracy: 0.8462 - val_loss: 0.5717
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 32ms/step - accuracy: 0.8573 - loss: 0.5361 - val_accuracy: 0.8528 - val_loss: 0.5462
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 32ms/step - accuracy: 0.8677 - loss: 0.4915 - val_accuracy: 0.8565 - val_loss: 0.5408
Epoch 6/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 32ms/step - accuracy: 0.8784 - loss: 0.4477 - val_accuracy: 0.8571 - val_loss: 0.541

Hybrid v3: Deep CNN + unchanged transformer

In [None]:
def build_hybrid_model_v2(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.Conv2D(128, (3, 3), activation='relu')(x)
    x = layers.Flatten()(x)

    x = layers.Dense(128)(x)
    x = layers.Reshape((1, 128))(x)

    for _ in range(2):
        x = transformer_encoder(x, embed_dim=128, num_heads=4, ff_dim=256)

    x = layers.LayerNormalization()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return models.Model(inputs, outputs)

# ---------- COMPILE ----------
model_hybrid_v2 = build_hybrid_model_v2(input_shape, NUM_CLASSES)
model_hybrid_v2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_hybrid_v2.fit(X_train, y_train_cat,
                       validation_data=(X_val, y_val_cat),
                       epochs=20, batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_hybrid_v2.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Hybrid: 3-Conv + 2-Transformer'] = test_acc
print(f"\n Hybrid: 3-Conv + 2-Transformer: {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_hybrid_v2.save('quickdraw_hybrid_v2_cnn.keras')
print("Model saved to 'quickdraw_hybrid_v2_cnn.keras'")

Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 38ms/step - accuracy: 0.5934 - loss: 1.5759 - val_accuracy: 0.8124 - val_loss: 0.6875
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 37ms/step - accuracy: 0.8233 - loss: 0.6707 - val_accuracy: 0.8465 - val_loss: 0.5630
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 37ms/step - accuracy: 0.8504 - loss: 0.5644 - val_accuracy: 0.8578 - val_loss: 0.5335
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 37ms/step - accuracy: 0.8661 - loss: 0.5011 - val_accuracy: 0.8618 - val_loss: 0.5147
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 37ms/step - accuracy: 0.8788 - loss: 0.4488 - val_accuracy: 0.8637 - val_loss: 0.5145
Epoch 6/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 37ms/step - accuracy: 0.8882 - loss: 0.4098 - val_accuracy: 0.8604 - val_loss: 0.541

Hybrid v3: Shallow CNN + Deeper Transformer + Positional Embedding

In [None]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, embed_dim):
        super().__init__()
        self.position_embedding = layers.Embedding(input_dim=sequence_length, output_dim=embed_dim)

    def call(self, x):
        positions = tf.range(start=0, limit=tf.shape(x)[1], delta=1)
        return x + self.position_embedding(positions)

def build_hybrid_model_v3(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.Flatten()(x)

    x = layers.Dense(192)(x)
    x = layers.Reshape((1, 192))(x)

    x = PositionalEmbedding(sequence_length=1, embed_dim=192)(x)

    for _ in range(3):
        x = transformer_encoder(x, embed_dim=192, num_heads=6, ff_dim=384)

    x = layers.LayerNormalization()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return models.Model(inputs, outputs)

# ---------- COMPILE ----------
model_hybrid_v3 = build_hybrid_model_v3(input_shape, NUM_CLASSES)
model_hybrid_v3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_hybrid_v3.fit(X_train, y_train_cat,
                       validation_data=(X_val, y_val_cat),
                       epochs=20, batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_hybrid_v3.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Hybrid: 2-Conv + 3-Transformer + PosEnc'] = test_acc
print(f"\n Hybrid: 2-Conv + 3-Transformer + PosEnc: {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_hybrid_v3.save('quickdraw_hybrid_v3_transformer.keras')
print("Model saved to 'quickdraw_hybrid_v3_transformer.keras'")

Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m402s[0m 105ms/step - accuracy: 0.5456 - loss: 1.7586 - val_accuracy: 0.8012 - val_loss: 0.7360
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m393s[0m 105ms/step - accuracy: 0.8029 - loss: 0.7444 - val_accuracy: 0.8330 - val_loss: 0.6147
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m392s[0m 105ms/step - accuracy: 0.8351 - loss: 0.6203 - val_accuracy: 0.8459 - val_loss: 0.5706
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m392s[0m 105ms/step - accuracy: 0.8533 - loss: 0.5462 - val_accuracy: 0.8513 - val_loss: 0.5505
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m392s[0m 105ms/step - accuracy: 0.8671 - loss: 0.4955 - val_accuracy: 0.8571 - val_loss: 0.5374
Epoch 6/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m392s[0m 105ms/step - accuracy: 0.8780 - loss: 0.4515 - val_accuracy: 0.8586 - val_loss:

Hybrid v4: Deeper CNN + Deeper Transformer + Positional Embedding

In [None]:
def build_hybrid_model_v4(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.Conv2D(128, (3, 3), activation='relu')(x)
    x = layers.Flatten()(x)

    x = layers.Dense(192)(x)
    x = layers.Reshape((1, 192))(x)

    x = PositionalEmbedding(sequence_length=1, embed_dim=192)(x)

    for _ in range(3):
        x = transformer_encoder(x, embed_dim=192, num_heads=6, ff_dim=384)

    x = layers.LayerNormalization()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return models.Model(inputs, outputs)

# ---------- COMPILE ----------
model_hybrid_v4 = build_hybrid_model_v4(input_shape, NUM_CLASSES)
model_hybrid_v4.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# ---------- TRAIN ----------
history = model_hybrid_v4.fit(X_train, y_train_cat,
                       validation_data=(X_val, y_val_cat),
                       epochs=20, batch_size=128)

# ---------- EVALUATE ----------
test_loss, test_acc = model_hybrid_v4.evaluate(X_test, y_test_cat, verbose=0)
model_accuracies['Hybrid: 3-Conv + 3-Transformer + PosEnc'] = test_acc
print(f"\n Hybrid: 3-Conv + 3-Transformer + PosEnc: {test_acc:.4f}")

# ---------- SAVE MODEL ----------
model_hybrid_v4.save('quickdraw_hybrid_v4_strong.keras')
print("Model saved to 'quickdraw_hybrid_v4_strong.keras'")

Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m589s[0m 155ms/step - accuracy: 0.5810 - loss: 1.6382 - val_accuracy: 0.8219 - val_loss: 0.6582
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m579s[0m 154ms/step - accuracy: 0.8239 - loss: 0.6771 - val_accuracy: 0.8505 - val_loss: 0.5603
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m579s[0m 154ms/step - accuracy: 0.8609 - loss: 0.5311 - val_accuracy: 0.8539 - val_loss: 0.5481
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m579s[0m 154ms/step - accuracy: 0.8851 - loss: 0.4312 - val_accuracy: 0.8535 - val_loss: 0.5541
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m579s[0m 154ms/step - accuracy: 0.9052 - loss: 0.3509 - val_accuracy: 0.8562 - val_loss: 0.5853
Epoch 6/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m584s[0m 156ms/step - accuracy: 0.9212 - loss: 0.2843 - val_accuracy: 0.8528 - val_loss:

Save all accuracies into a CSV file

In [19]:
import pandas as pd

df = pd.DataFrame.from_dict(model_accuracies, orient='index', columns=['Accuracy'])
df.to_csv("model_accuracies_10000.csv")

print("Saved model_accuracies_10000.csv")


Saved model_accuracies_10000.csv
