In [None]:
!pip install tensorflow

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import tensorflow_datasets as tfds


In [None]:
IMG_SIZE = 64
BATCH_SIZE = 16

def preprocess_image(image, label):
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

train_ds = train_ds.map(preprocess_image).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.map(preprocess_image).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.map(preprocess_image).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
class VisionTransformer(models.Model):
    def __init__(self, num_classes, image_size=IMG_SIZE, patch_size=6, num_layers=8,
                 d_model=64, num_heads=4, mlp_dim=128, dropout=0.1):
        super(VisionTransformer, self).__init__()
        self.num_patches = (image_size // patch_size) ** 2
        self.patch_dim = 3 * patch_size * patch_size
        self.patch_size = patch_size

        self.flatten_patches = layers.Reshape((self.num_patches, self.patch_dim))
        self.projection = layers.Dense(d_model)
        self.position_embedding = layers.Embedding(input_dim=self.num_patches, output_dim=d_model)

        self.transformer_layers = [
            layers.TransformerBlock(d_model, num_heads, mlp_dim, dropout)
            for _ in range(num_layers)
        ]
        self.mlp_head = models.Sequential([
            layers.LayerNormalization(epsilon=1e-6),
            layers.GlobalAveragePooling1D(),
            layers.Dense(mlp_dim, activation='relu'),
            layers.Dropout(dropout),
            layers.Dense(num_classes)
        ])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        patches = tf.image.extract_patches(
            images=inputs,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding='VALID'
        )
        patches = self.flatten_patches(patches)
        x = self.projection(patches)
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        x += self.position_embedding(positions)

        for transformer_layer in self.transformer_layers:
            x = transformer_layer(x)

        return self.mlp_head(x)

In [None]:
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=keras.activations.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

In [None]:
import keras
from keras import layers
from keras import ops

In [None]:
class Patches(layers.Layer):
    def __init__(self, patch_size):
        super().__init__()
        self.patch_size = patch_size

    def call(self, images):
        input_shape = ops.shape(images)
        batch_size = input_shape[0]
        height = input_shape[1]
        width = input_shape[2]
        channels = input_shape[3]
        num_patches_h = height // self.patch_size
        num_patches_w = width // self.patch_size
        patches = keras.ops.image.extract_patches(images, size=self.patch_size)
        patches = ops.reshape(
            patches,
            (
                batch_size,
                num_patches_h * num_patches_w,
                self.patch_size * self.patch_size * channels,
            ),
        )
        return patches

    def get_config(self):
        config = super().get_config()
        config.update({"patch_size": self.patch_size})
        return config


In [None]:
image_size= 64
num_classes = 10
input_shape = (64, 64, 3)
patch_size= 6

In [None]:
sample_batch = next(iter(train_ds))
sample_images, _ = sample_batch

sample_images = sample_images.numpy()

random_index = np.random.randint(0, sample_images.shape[0])
image = sample_images[random_index]

In [None]:
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(4, 4))
plt.imshow((image*255).astype("uint8"))
plt.axis("off")

resized_image = ops.image.resize(
    ops.convert_to_tensor([image]), size=(image_size, image_size)
)
patches = Patches(patch_size)(resized_image)
print(f"Image size: {image_size} X {image_size}")
print(f"Patch size: {patch_size} X {patch_size}")
print(f"Patches per image: {patches.shape[1]}")
print(f"Elements per patch: {patches.shape[-1]}")

n = int(np.sqrt(patches.shape[1]))
plt.figure(figsize=(4, 4))
for i, patch in enumerate(patches[0]):
    ax = plt.subplot(n, n, i + 1)
    patch_img = ops.reshape(patch, (patch_size, patch_size, 3))
    plt.imshow(ops.convert_to_numpy(patch_img*255).astype("uint8"))
    plt.axis("off")

In [None]:
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super().__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = ops.expand_dims(
            ops.arange(start=0, stop=self.num_patches, step=1), axis=0
        )
        projected_patches = self.projection(patch)
        encoded = projected_patches + self.position_embedding(positions)
        return encoded

    def get_config(self):
        config = super().get_config()
        config.update({"num_patches": self.num_patches})
        return config

In [None]:
def create_vit_classifier():
    inputs = keras.Input(shape=input_shape)
    # Augment data.
    augmented = data_augmentation(inputs)
    # Create patches.
    patches = Patches(patch_size)(augmented)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    # Add MLP.
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    # Classify outputs.
    logits = layers.Dense(num_classes)(features)
    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=logits)
    return model

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom

data_augmentation = tf.keras.Sequential([
    RandomFlip("horizontal"),
    RandomRotation(0.2),
    RandomZoom(0.2),
])

In [None]:
train_dataset = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))
val_dataset = val_ds.map(lambda x, y: (data_augmentation(x, training=True), y))
test_dataset = test_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

In [None]:
dataset1, info1 = tfds.load("eurosat/rgb", as_supervised=True, with_info=True)
full_ds = dataset1["train"]

# Define sizes
num_samples = info1.splits["train"].num_examples
train_size = int(0.8 * num_samples)  # 80% for training
val_size = int(0.1 * num_samples)    # 10% for validation
test_size = num_samples - train_size - val_size  # Remaining 10% for test

# Split dataset
train_ds = full_ds.take(train_size)
val_ds = full_ds.skip(train_size).take(val_size)
test_ds = full_ds.skip(train_size + val_size)

# Function to convert dataset to NumPy arrays
def dataset_to_numpy(dataset):
    images, labels = [], []
    for img, label in dataset:
        img = tf.image.resize(img, (64, 64))  # Ensure uniform size (change if needed)
        img = tf.image.convert_image_dtype(img, tf.float32)  # Normalize
        images.append(img.numpy())  # Convert to NumPy
        labels.append(label.numpy())

    return np.stack(images), np.array(labels)  # ✅ Ensure correct shape

# Convert datasets
x_train, y_train = dataset_to_numpy(train_ds)
x_val, y_val = dataset_to_numpy(val_ds)
x_test, y_test = dataset_to_numpy(test_ds)

# Print shapes
print(f"x_train: {x_train.shape}, y_train: {y_train.shape}")
print(f"x_val: {x_val.shape}, y_val: {y_val.shape}")
print(f"x_test: {x_test.shape}, y_test: {y_test.shape}")

In [None]:
learning_rate = 0.001
weight_decay = 0.0001
num_patches = (image_size // patch_size) ** 2

In [None]:
projection_dim=128
transformer_layers=10
num_heads=6
transformer_units = [
    projection_dim * 2,
    projection_dim,
]
mlp_head_units = [
    512,
    256,
]

In [None]:
def run_experiment(model):
    optimizer = keras.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

    model.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy"),
        ],
    )

    checkpoint_filepath = "/tmp/checkpoint.weights.h5"
    checkpoint_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_filepath,
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=True,
    )

    history = model.fit(
        x=x_train,
        y=y_train,
        batch_size=16,
        epochs=20,
        validation_split=0.1,
        callbacks=[checkpoint_callback],
    )

    model.load_weights(checkpoint_filepath)
    _, accuracy, top_5_accuracy = model.evaluate(x_test, y_test)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")
    print(f"Test top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")

    return history


vit_classifier = create_vit_classifier()
history = run_experiment(vit_classifier)


def plot_history(item):
    plt.plot(history.history[item], label=item)
    plt.plot(history.history["val_" + item], label="val_" + item)
    plt.xlabel("Epochs")
    plt.ylabel(item)
    plt.title("Train and Validation {} Over Epochs".format(item), fontsize=14)
    plt.legend()
    plt.grid()
    plt.show()


plot_history("loss")
plot_history("top-5-accuracy")
plot_history("accuracy")

In [None]:
vit_classifier.save("/content/drive/My Drive/VITWEIGHTS.h5")