# IMPORTS

In [None]:
import tensorflow as tf # For tensorflow
import numpy as np # For mathematical computations
import matplotlib.pyplot as plt # For plotting and Visualization
import seaborn as sns
from tensorflow.keras.layers import Input, Layer, Resizing, Rescaling, InputLayer, Conv2D, BatchNormalization, MaxPooling2D, Dropout, Flatten, Dense, RandomRotation, RandomFlip, RandomContrast, ReLU, Add, GlobalAveragePooling2D, Permute
from tensorflow.keras import Model
from tensorflow.keras.regularizers import L2
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy, TopKCategoricalAccuracy
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
import cv2


# wandb INSTALLATION

In [None]:
!pip install wandb

In [None]:
import wandb
from wandb.keras import WandbCallback

In [None]:
!wandb login

In [None]:
wandb.init(project="Human-Emotion-Detection", entity="ishu9t2")

In [None]:
wandb.config = {
    "BATCH_SIZE":32,
    "IM_SIZE": 224,
    "LEARNING_RATE": 5e-5,
    "N_EPOCHS": 20,
    "DROPOUT_RATE": 0.0,
    "REGULARIZATION_RATE": 0.0,
    "N_FILTERS": 6,
    "KERNEL_SIZE": 3,
    "N_STRIDES": 1,
    "POOL_SIZE": 2,
    "N_DENSE_1": 1024,
    "N_DENSE_2": 128,
    "NUM_CLASSES": 3,
    "PATCH_SIZE": 16,
    "PROJ_DIM": 768,
    "CLASS_NAMES": ["angry", "happy", "sad"],
}
CONFIGURATION = wandb.config

# KAGGLE

In [None]:
!pip install -q kaggle #installing kaggle

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [None]:
!chmod 600 /root/.kaggle/kaggle.json #changing permission to read and execute

# DATA LOADING

In [None]:
!kaggle datasets download -d muhammadhananasghar/human-emotions-datasethes #downloading dataset from kaggle (command from kaggle website)

In [None]:
!ls

In [None]:
!unzip "/content/human-emotions-datasethes.zip" -d "/content/dataset/" # Unzipping dataset

# CONFIGURATION / CONSTANTS

In [None]:
TRAIN_DIR = "/content/dataset/Emotions Dataset/Emotions Dataset/train"
TEST_DIR = "/content/dataset/Emotions Dataset/Emotions Dataset/test"
VAL_DIR = "/content/dataset/Emotions Dataset/Emotions Dataset/test"

CONFIGURATION = {
    "IM_SIZE": 224,
    "BATCH_SIZE": 32,
    "LEARNING_RATE": 0.001,
    "N_EPOCHS": 20,
    "DROPOUT_RATE": 0.0,
    "REGULARIZATION_RATE": 0.0,
    "N_FILTERS": 6,
    "KERNEL_SIZE": 3,
    "N_STRIDES": 1,
    "POOL_SIZE": 2,
    "N_DENSE_1": 1024,
    "N_DENSE_2": 128,
    "N_DENSE_3": 3,
    "NUM_CLASSES": 3,
    "CLASS_NAMES": ["angry", "happy", "sad"]
}



# DATASET CREATION

In [None]:
train_dataset = tf.keras.utils.image_dataset_from_directory(
    TRAIN_DIR,
    labels='inferred',  # Automatically infer class labels from subdirectories
    label_mode='categorical',  # Specify the label mode (e.g., categorical, binary)
    batch_size=CONFIGURATION["BATCH_SIZE"],  # Number of samples per batch
    image_size=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]),  # Target image size
    shuffle=True,  # Shuffle the dataset
    seed=123,  # Random seed for shuffling
)

In [None]:
val_dataset = tf.keras.utils.image_dataset_from_directory(
    VAL_DIR,
    labels='inferred',  # Automatically infer class labels from subdirectories
    label_mode='categorical',  # Specify the label mode (e.g., categorical, binary)
    batch_size=32,#CONFIGURATION["BATCH_SIZE"],  # Number of samples per batch
    image_size=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]),  # Target image size
    shuffle=True,  # Shuffle the dataset
    seed=123,  # Random seed for shuffling
)

In [None]:
for i in val_dataset.take(1):
  print(i)

# DATA VISUALIZATION

In [None]:
plt.figure(figsize = (14, 14))
for images, labels in train_dataset.take(1):
  for i in range(16):
    plt.subplot(4, 4, i+1)
    plt.imshow(images[i]/255)
    plt.title(train_dataset.class_names[tf.argmax(labels[i]).numpy()])
    plt.axis("off")

In [None]:
print(val_dataset.class_names)

# DATA AUGMENTATION

## BASIC AUGMENTATION

In [None]:
augment_layers = tf.keras.Sequential([
    RandomRotation(factor=(-0.025, 0.025)),
    RandomFlip(mode="horizontal"),
    RandomContrast(factor=0.1)
])

def augment_layer(image, label):
  return augment_layers(image, training=True), label

## CUT MIX AUGMENTATION

In [None]:
def sample_beta_distribution(size, concentration_0=0.2, concentration_1=0.2):
    gamma_1_sample = tf.random.gamma(shape=[size], alpha=concentration_1)
    gamma_2_sample = tf.random.gamma(shape=[size], alpha=concentration_0)
    return gamma_1_sample / (gamma_1_sample + gamma_2_sample)

IMG_SIZE = CONFIGURATION["IM_SIZE"]
@tf.function
def get_box(lambda_value):
    cut_rat = tf.math.sqrt(1.0 - lambda_value)

    cut_w = IMG_SIZE * cut_rat  # rw
    cut_w = tf.cast(cut_w, tf.int32)

    cut_h = IMG_SIZE * cut_rat  # rh
    cut_h = tf.cast(cut_h, tf.int32)

    cut_x = tf.random.uniform((1,), minval=0, maxval=IMG_SIZE, dtype=tf.int32)  # rx
    cut_y = tf.random.uniform((1,), minval=0, maxval=IMG_SIZE, dtype=tf.int32)  # ry

    boundaryx1 = tf.clip_by_value(cut_x[0] - cut_w // 2, 0, IMG_SIZE)
    boundaryy1 = tf.clip_by_value(cut_y[0] - cut_h // 2, 0, IMG_SIZE)
    bbx2 = tf.clip_by_value(cut_x[0] + cut_w // 2, 0, IMG_SIZE)
    bby2 = tf.clip_by_value(cut_y[0] + cut_h // 2, 0, IMG_SIZE)

    target_h = bby2 - boundaryy1
    if target_h == 0:
        target_h += 1

    target_w = bbx2 - boundaryx1
    if target_w == 0:
        target_w += 1

    return boundaryx1, boundaryy1, target_h, target_w


@tf.function
def cutmix(train_ds_one, train_ds_two):
    (image1, label1), (image2, label2) = train_ds_one, train_ds_two

    alpha = [0.25]
    beta = [0.25]

    # Get a sample from the Beta distribution
    lambda_value = sample_beta_distribution(1, alpha, beta)

    # Define Lambda
    lambda_value = lambda_value[0][0]

    # Get the bounding box offsets, heights and widths
    boundaryx1, boundaryy1, target_h, target_w = get_box(lambda_value)

    # Get a patch from the second image (`image2`)
    crop2 = tf.image.crop_to_bounding_box(
        image2, boundaryy1, boundaryx1, target_h, target_w
    )
    # Pad the `image2` patch (`crop2`) with the same offset
    image2 = tf.image.pad_to_bounding_box(
        crop2, boundaryy1, boundaryx1, IMG_SIZE, IMG_SIZE
    )
    # Get a patch from the first image (`image1`)
    crop1 = tf.image.crop_to_bounding_box(
        image1, boundaryy1, boundaryx1, target_h, target_w
    )
    # Pad the `image1` patch (`crop1`) with the same offset
    img1 = tf.image.pad_to_bounding_box(
        crop1, boundaryy1, boundaryx1, IMG_SIZE, IMG_SIZE
    )

    # Modify the first image by subtracting the patch from `image1`
    # (before applying the `image2` patch)
    image1 = image1 - img1
    # Add the modified `image1` and `image2`  together to get the CutMix image
    image = image1 + image2

    # Adjust Lambda in accordance to the pixel ration
    lambda_value = 1 - (target_w * target_h) / (IMG_SIZE * IMG_SIZE)
    lambda_value = tf.cast(lambda_value, tf.float32)

    # Combine the labels of both images
    label = lambda_value * label1 + (1 - lambda_value) * label2
    return image, label

# DATASET PREPERATION

TRAIN DATASET

In [None]:
# Without Augmentation
training_dataset = (
    train_dataset
    .prefetch(tf.data.AUTOTUNE)
)

In [None]:
# Basic Augmentation
training_dataset = (
    train_dataset
    .map(augment_layer, num_parallel_calls = tf.data.AUTOTUNE)
    .prefetch(tf.data.AUTOTUNE)
)

In [None]:
# Cutmix Augmentation + Basic

train_dataset_1 = train_dataset.map(augment_layer, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset_2 = train_dataset.map(augment_layer, num_parallel_calls=tf.data.AUTOTUNE)
mixed_dataset = tf.data.Dataset.zip((train_dataset_1, train_dataset_2))

training_dataset = (
    mixed_dataset
    .map(cutmix, num_parallel_calls = tf.data.AUTOTUNE)
    .prefetch(tf.data.AUTOTUNE)
)

VALIDATION DATASET

In [None]:
validation_dataset = (
    val_dataset
    .prefetch(tf.data.AUTOTUNE)
)

# MODELLING

## LENET MODEL

In [None]:
lenet_model = tf.keras.Sequential([
    # Input Layer, as images have different shapes therefore None, None, 3
    InputLayer(input_shape=(None, None, 3)),

    # Resing & Rescaling to make each image same dimensions and normalising there value
    Resizing(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]),
    Rescaling(1./255),

    # Convolution Layers for extracting features from images
    # Convolution layer 1
    Conv2D(filters=CONFIGURATION["N_FILTERS"], kernel_size=CONFIGURATION["KERNEL_SIZE"], strides=CONFIGURATION["N_STRIDES"], padding="valid", activation="relu", kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"])),

    # Batch normaliation, to have zero meand unit variance for each mini batch and normalzing inputs to each layer
    BatchNormalization(),

    # Downsampling reducing spatial dimensions of the input tensor while learning the most prominent features
    MaxPooling2D(pool_size=CONFIGURATION["POOL_SIZE"], strides=CONFIGURATION["N_STRIDES"]*2),

    # dropout, regularization technique to reduce overfitting, prevent model from relying too heavily on specific neurons by randomly "droping out"
    Dropout(CONFIGURATION["DROPOUT_RATE"]),

    # Convolution layer 2
    Conv2D(filters=CONFIGURATION["N_FILTERS"]*2+4, kernel_size=CONFIGURATION["KERNEL_SIZE"], strides=CONFIGURATION["N_STRIDES"], padding="valid", activation="relu", kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"])),

    # Batch normaliation
    BatchNormalization(),

    # Downsampling
    MaxPooling2D(pool_size=CONFIGURATION["POOL_SIZE"], strides=CONFIGURATION["N_STRIDES"]*2),

    # Flatten, convert multi-dimensional input tensor into a one-dimensional tensor
    Flatten(),

    # Dense Layers for classification from extracted features
    # Dense layer 1
    Dense(CONFIGURATION["N_DENSE_1"], activation="relu", kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"])),
    BatchNormalization(),
    Dropout(CONFIGURATION["DROPOUT_RATE"]),

    # Dense layer 2
    Dense(CONFIGURATION["N_DENSE_2"], activation="relu", kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"])),
    BatchNormalization(),

    # Dense layer 3
    Dense(CONFIGURATION["N_DENSE_3"], activation="softmax"),
])
lenet_model.summary()

In [None]:
tf.keras.utils.plot_model(lenet_model, to_file="model.png", show_shapes=True, show_layer_names=True)

## RESNET34 (Residual Network with 34 layers) MODEL

In [None]:
class CustomConv2D(Layer):
  def __init__(self, filters, kernel_size, strides):
    super(CustomConv2D, self).__init__()
    self.conv = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding="same", activation="relu")
    self.bn = BatchNormalization()

  def call(self, inputs):
    x = self.conv(inputs)
    x = self.bn(x)

    return x


class ResidualBlock(Layer):
  def __init__(self, filters, strides=1):
    super(ResidualBlock, self).__init__(name="res_block")
    self.conv1 = CustomConv2D(filters, 3, strides)
    self.conv2 = CustomConv2D(filters, 3, 1)

    self.dotted = (strides!=1)
    if self.dotted:
      self.dimensionMatching = CustomConv2D(filters, 1, 2)

  def call(self, inputs):
    x = self.conv1(inputs)
    x = self.conv2(x)

    if self.dotted:
      inputs = self.dimensionMatching(inputs)

    x = Add()([x, inputs])
    x = ReLU()(x)

    return x

class ResNet34(Model):
  def __init__(self):
    super(ResNet34, self).__init__(name = "resnet34")
    self.conv = CustomConv2D(64, 7, 2)
    self.max_pool = MaxPooling2D(pool_size=3, strides=2, padding="same")

    self.block1 = ResidualBlock(64)
    self.block2 = ResidualBlock(64)
    self.block3 = ResidualBlock(64)

    self.block4 = ResidualBlock(128, 2)
    self.block5 = ResidualBlock(128)
    self.block6 = ResidualBlock(128)
    self.block7 = ResidualBlock(128)

    self.block8 = ResidualBlock(256, 2)
    self.block9 = ResidualBlock(256)
    self.block10 = ResidualBlock(256)
    self.block11 = ResidualBlock(256)
    self.block12 = ResidualBlock(256)
    self.block13 = ResidualBlock(256)

    self.block14 = ResidualBlock(512, 2)
    self.block15 = ResidualBlock(512)
    self.block16 = ResidualBlock(512)

    self.avgpool = GlobalAveragePooling2D()

    self.fc = Dense(CONFIGURATION["NUM_CLASSES"], activation="softmax")

  def call(self, inputs):

    x = Resizing(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"])(inputs)
    x = Rescaling(1./255)(x)

    x = self.conv(x)
    x = self.max_pool(x)

    x = self.block1(x)
    x = self.block2(x)
    x = self.block3(x)

    x = self.block4(x)
    x = self.block5(x)
    x = self.block6(x)
    x = self.block7(x)

    x = self.block8(x)
    x = self.block9(x)
    x = self.block10(x)
    x = self.block11(x)
    x = self.block12(x)
    x = self.block13(x)

    x = self.block14(x)
    x = self.block15(x)
    x = self.block16(x)

    x = self.avgpool(x)

    x = self.fc(x)
    return x



In [None]:
resnet_34 = ResNet34()
resnet_34(tf.zeros([1,224,224,3]))
resnet_34.summary()

In [None]:
tf.keras.utils.plot_model(resnet_34, to_file="model.png", show_shapes=True, show_layer_names=True)

## EFFICIENTNET MODEL

### TRANSFER LEARNING

In [None]:
feature_extractor = tf.keras.applications.efficientnet.EfficientNetB4(
    include_top=False,
    weights='imagenet',
    input_shape=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"], 3),
)
# First Set False For Tansfer Learning then True For Fine Tunning
feature_extractor.trainable = False

In [None]:
input = Input(shape=(None, None, 3))
x = Resizing(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"])(input)
x = Rescaling(1./255)(x)
x = feature_extractor(x, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
output = Dense(3, activation='softmax')(x)

efficient_net_model = Model(input, output)

### FINE TUNNING

In [None]:
feature_extractor_finetune = tf.keras.applications.efficientnet.EfficientNetB4(
    include_top=False,
    weights='imagenet',
    input_shape=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"], 3),
)
# First Set False For Tansfer Learning then True For Fine Tunning
feature_extractor_finetune.trainable = False

In [None]:
input = Input(shape=(None, None, 3))

x = Resizing(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"])(input)
x = Rescaling(1./255)(x)

x = feature_extractor(x, training=False)

x = GlobalAveragePooling2D()(x)

x = Dense(CONFIGURATION["N_DENSE_1"], activation="relu", kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"]))(x)
x = BatchNormalization()(x)
x = Dropout(CONFIGURATION["DROPOUT_RATE"])(x)

x = Dense(CONFIGURATION["N_DENSE_2"], activation="relu", kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"]))(x)
x = BatchNormalization()(x)

output = Dense(CONFIGURATION["N_DENSE_3"], activation="softmax")(x)

finetuned_model = Model(input, output)

# TRAINING

## CALLBACKS

In [None]:
checkpoint_callback = ModelCheckpoint(
    "best_weights",
    monitor="val_accuracy",
    mode="max",
    verbose=1,
    save_best_only=True
)

## LOSS FUNCTIO & METRICS

In [None]:
# From logits = False, means that output vector will be a probability distribution having sum = 1
# Else we can set it True if output is raw that means it is not a pd and it is directly take from neurons output
loss_function = CategoricalCrossentropy(from_logits=False)

# Categorical accuracy  [0, 0, 1] matches with [0, 0.1, 0.9] (that is it will check whether highest value matches or not for same class)
# Top K Categorical accuracy compares how often target in top k prediction
metrics = [CategoricalAccuracy(name="accuracy"), TopKCategoricalAccuracy(k=2, name="top_k_accuracy")]

In [None]:
# Sparse Categorical
# When label mode for dataset is "int" that 0[1,0,0], 1[0,1,0], 2[0,0,1] type
# sparse_loss_functin = SparseCategoricalCrossentropy()

## LENET

In [None]:
lenet_model.compile(
    optimizer=Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
    loss=loss_function,
    metrics=metrics
)

In [None]:
history = lenet_model.fit(
    training_dataset,
    validation_data = validation_dataset,
    epochs=CONFIGURATION["N_EPOCHS"],
    verbose=1,
)

## RESNET_34

In [None]:
resnet_34.compile(
    optimizer=Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
    loss=loss_function,
    metrics=metrics
)

In [None]:
history_resnet = resnet_34.fit(
    training_dataset,
    validation_data = validation_dataset,
    epochs=CONFIGURATION["N_EPOCHS"],
    verbose=1,
)

## EFFICIENT NET

In [None]:
efficient_net_model.compile(
    optimizer=Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
    loss=loss_function,
    metrics=metrics,
)

In [None]:
history_efficient_net = efficient_net_model.fit(training_dataset, validation_data=validation_dataset, epochs = 3, verbose=1)

## FINETUNE

In [None]:
finetuned_model.compile(
    optimizer=Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
    loss=loss_function,
    metrics=metrics,
)

In [None]:
history_finetune = finetuned_model.fit(training_dataset, validation_data=validation_dataset, epochs = 10, verbose=1)

# EVALUATION

In [None]:
lenet_model.evaluate(validation_dataset)

In [None]:
resnet_34.evaluate(validation_dataset)

In [None]:
efficient_net_model.evaluate(validation_dataset)

In [None]:
finetuned_model.evaluate(validation_dataset)

# VISUALIZING RESULTS OF TRAINING

## LOSS

### LENET MODEL

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("LOSS V/S EPOCH")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Train", "Val"])

### RESNET

In [None]:
plt.plot(history_resnet.history["loss"])
plt.plot(history_resnet.history["val_loss"])
plt.title("LOSS V/S EPOCH")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Train", "Val"])

### EFFICIENT NET

In [None]:
plt.plot(history_efficient_net.history["loss"])
plt.plot(history_efficient_net.history["val_loss"])
plt.title("LOSS V/S EPOCH")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Train", "Val"])

### FINETUNED

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("LOSS V/S EPOCH")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Train", "Val"])

## ACCURACY

### LENET MODEL

In [None]:
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("ACCURACY V/S EPOCH")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(["Train", "Val"])

In [None]:
plt.plot(history.history["top_k_accuracy"])
plt.plot(history.history["val_top_k_accuracy"])
plt.title("Top K Accuracy V/S EPOCH")
plt.xlabel("Epoch")
plt.ylabel("Topp K Accuracy")
plt.legend(["Train", "Val"])

# TESTING

In [None]:
plt.figure(figsize = (14, 14))
for images, labels in val_dataset.take(1):
  for i in range(16):
    plt.subplot(4, 4, i+1)
    plt.imshow(images[i]/255)
    plt.title("Actual : " + CLASS_NAMES[tf.argmax(labels[i]).numpy()] + "\nPredicted : " + CLASS_NAMES[tf.argmax(lenet_model.predict(tf.expand_dims(images[i], axis=0)), axis=-1).numpy()[0]])
    plt.axis("off")

# PERFORMANCE

## CONFUSION MATRIX

In [None]:
true_labels = []
predicted_labels = []

In [None]:
for batch_images, batch_labels in validation_dataset:
  true_labels.extend(tf.argmax(batch_labels, axis=-1).numpy().tolist())
  predicted_labels.extend(tf.argmax(lenet_model.predict(batch_images), axis=-1).numpy().tolist())

In [None]:
print(true_labels)
print(len(true_labels))

In [None]:
print(predicted_labels)
print(len(predicted_labels))

In [None]:
cm = confusion_matrix(true_labels, predicted_labels)
cm

In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(cm, annot=True, fmt="d", linewidths=2)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

# VISUALISING INTERMEDIATE CONVOLUTION LAYERS

In [None]:
vgg_backbone = tf.keras.applications.vgg16.VGG16(
    include_top = False,
    weights = "imagenet",
    input_shape = (CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"], 3)
)
vgg_backbone.summary()

In [None]:
def is_conv(layer_name):
  if "conv" in layer_name:
    return True
  return False

In [None]:
feature_maps = [layer.output for layer in vgg_backbone.layers[1:] if is_conv(layer.name)]

In [None]:
feature_map_model = Model(
    inputs = vgg_backbone.input,
    outputs = feature_maps
)
feature_map_model.summary()

In [None]:
print(len(feature_maps))

In [None]:
test_image = cv2.imread("/content/dataset/Emotions Dataset/Emotions Dataset/test/happy/110020.jpg")
test_image = cv2.resize(test_image, (224, 224))

im = tf.constant(test_image, dtype=tf.float32)
im = tf.expand_dims(im, axis=0)

f_maps = feature_map_model.predict(im)

In [None]:
print(len(f_maps))

In [None]:
for i in range(len(f_maps)):
  print(f_maps[i].shape)

In [None]:
for i in range(3):
  plt.figure(figsize=(224, 224))
  n_channels = f_maps[i].shape[3]
  size = f_maps[i].shape[1]
  joint_maps = np.ones((size, size*n_channels))

  plt.subplot(3, 1, i+1)
  for j in range(n_channels):
    joint_maps[:, size*j:size*(j+1)] = f_maps[i][..., j]

  plt.imshow(joint_maps[:, 0:448])
  plt.axis("off")

# VISION TRANSFORMERS

In [None]:
test_image = cv2.imread("/content/dataset/Emotions Dataset/Emotions Dataset/train/happy/110028.jpg")

In [None]:
test_image = cv2.resize(test_image, (256, 256))

In [None]:
plt.imshow(test_image)

In [None]:
patches = tf.image.extract_patches(
    images=tf.expand_dims(test_image, axis=0),
    sizes=[1, 16, 16, 1],
    strides=[1, 16, 16, 1],
    rates=[1, 1, 1, 1],
    padding="VALID"
)

In [None]:
patches.shape

In [None]:
print(patches.shape)
patches = tf.reshape(patches, (1,256,768))
print(patches.shape)

In [None]:
for i in range(patches.shape[1]):
    plt.subplot(16, 16, i+1)
    im = patches[0][i][j]
    plt.imshow(tf.reshape(patches[0][i], (16,16,3)))
    plt.axis("off")


In [None]:
class PatchEncoder(Layer):
  def __init__(self, N_PATCHES, HIDDEN_SIZE):
    super(PatchEncoder, self).__init__(name="patch_encoder")
    self.linear_projection = Dense(HIDDEN_SIZE)
    self.positional_embedding = Embedding()
  def call(self, inputs):
    patches = tf.image.extract_patches(
        images=inputs,
        sizes=[1, 16, 16, 1],
        strides=[1, 16, 16, 1],
        rates=[1, 1, 1, 1],
        padding="VALID"
    )
    patches = tf.reshape(patches, (patches.shape[0], -1, patches.shape[-1]))
    ouput = self.linear_projection(patches)


# VIT WITH HUGGINGFACE

In [None]:
!pip install transformers

In [None]:
from transformers import ViTModel, ViTConfig

In [None]:
configuration = ViTConfig()

In [None]:
model = ViTModel(configuration)

In [None]:
print(model.config)

In [None]:
from transformers import ViTFeatureExtractor, TFViTModel

In [None]:
resize_rescale_hf = tf.keras.Sequential([
    Resizing(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]),
    Rescaling(1./255),
    Permute((3,1,2))
])

In [None]:
base_model = TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k")
inputs = Input(shape=(None,None,3))
x = resize_rescale_hf(inputs)
x = base_model.vit(x)[0][:,0,:]
outputs = Dense(3, activation="softmax")(x)
hf_model = tf.keras.Model(inputs=inputs, outputs=outputs)


In [None]:
test_image = cv2.imread("/content/dataset/Emotions Dataset/Emotions Dataset/train/happy/101883.jpg")
test_image = cv2.resize(test_image, (224, 224))

In [None]:
hf_model(tf.expand_dims(test_image, axis=0))

In [None]:
hf_model.summary()

In [None]:
loss_function = CategoricalCrossentropy(from_logits=False)
metrics = [CategoricalAccuracy(name="accuracy"), TopKCategoricalAccuracy(k=2, name="top_k_accuracy")]
hf_model.compile(
    optimizer=Adam(CONFIGURATION["LEARNING_RATE"]),
    loss=loss_function,
    metrics=metrics,
)

In [None]:
class LogConfMatrix(Callback):
  def on_epoch_end(self, epoch, logs):
    true_labels = []
    predicted_labels = []
    for batch_images, batch_labels in validation_dataset:
      true_labels.extend(tf.argmax(batch_labels, axis=-1).numpy().tolist())
      predicted_labels.extend(tf.argmax(hf_model(batch_images), axis=-1).numpy().tolist())

    cm = wandb.plot.confusion_matrix(
        y_true = true_labels,
        preds = predicted_labels,
        class_names = CONFIGURATION["CLASS_NAMES"]
    )
    wandb.log({"conf_mat": cm})


In [None]:
class LogResultsTable(Callback):
  def on_epoch_end(self, epoch, logs):
    columns = ["Image", "Predicted", "Label"]
    val_table = wandb.Table(columns=columns)
    for batch_images, batch_labels in validation_dataset.take(1):
      for image, label in zip(batch_images, batch_labels):
        true_label = CONFIGURATION["CLASS_NAMES"][tf.argmax(label).numpy()]
        predicted_label = CONFIGURATION["CLASS_NAMES"][tf.argmax(hf_model(tf.expand_dims(image, axis=0)), axis=-1).numpy()[0]]
        row = [wandb.Image(image), predicted_label, true_label]
        val_table.add_data(*row)
    wandb.log({"Model Results": val_table})

In [None]:
history_hf_model = hf_model.fit(
    training_dataset,
    validation_data=validation_dataset,
    epochs = 3,
    verbose=1,
    callbacks=[WandbCallback(), LogConfMatrix(), LogResultsTable(), LogResultsTable()]
    )

In [None]:
hf_model.evaluate(validation_dataset)

LOSS

In [None]:
plt.plot(history_hf_model.history["loss"])
plt.plot(history_hf_model.history["val_loss"])
plt.title("LOSS V/S EPOCH")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Train", "Val"])

ACCURACY

In [None]:
plt.plot(history_hf_model.history["accuracy"])
plt.plot(history_hf_model.history["val_accuracy"])
plt.title("LOSS V/S EPOCH")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Train", "Val"])

In [None]:
hf_model.evaluate(validation_dataset)

# Saving And Loading Model (Google Drive)

In [None]:
hf_model.save("vit_finetuned")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp -r /content/drive/MyDrive/MyViTModel/ /content/vit_finetuned/

In [None]:
my_vit_model = tf.keras.models.load_model("/content/vit_finetuned")

In [None]:
my_vit_model.evaluate(validation_dataset)

# Exporint To ONNX Format

## Installation

In [None]:
!pip install -U tf2onnx
!pip install onnxruntime

## Conversion From tensorflow to onnx

In [None]:
!python -m tf2onnx.convert --saved-model vit_finetuned/ --output vit_onnx.onnx

In [None]:
!cp -r /content/drive/MyDrive/vit_onnx.onnx /content/vit_onnx.onnx

## Predicting Using Onnx Model

In [None]:
import onnxruntime as rt

In [None]:
!pip install onnx

In [None]:
import onnx
model = onnx.load("/content/vit_onnx.onnx")

In [42]:
import onnxruntime as ort
session = ort.InferenceSession("/content/vit_onnx.onnx")

In [43]:
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

In [44]:
input_data = cv2.imread("/content/111073.jpg")
input_data = cv2.resize(input_data, (224, 224))
input_data = tf.expand_dims(tf.cast(input_data, tf.float32), axis=0).numpy()

In [45]:
output = session.run([output_name], {input_name: input_data})

In [46]:
print(output)

[array([[1.7842471e-04, 9.9974543e-01, 7.6104137e-05]], dtype=float32)]


# Creating Web Interface Using Gradio

In [28]:
!pip install gradio

Collecting gradio
  Downloading gradio-3.35.2-py3-none-any.whl (19.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.7/19.7 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles (from gradio)
  Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.99.1-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.4/58.4 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.0.tar.gz (4.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client>=0.2.7 (from gradio)
  Downloading gradio_client-0.2.7-py3-none-any.whl (288 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m288.4/288.4 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx (from gradio)
  Downloading httpx-0.24.1-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [29]:
import gradio as gr

In [31]:
!pip install onnx
!pip install onnxruntime



In [30]:
import onnxruntime as rt

In [32]:
import onnx
model = onnx.load("/content/vit_onnx.onnx")

In [33]:
import onnxruntime as ort
session = ort.InferenceSession("/content/vit_onnx.onnx")

In [34]:
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

In [69]:
CLASS_NAMES = ["Angry", "Happy", "Sad"]
def predict_image(im):
  im = tf.expand_dims(tf.cast(im, tf.float32), axis=0).numpy()
  prediction = session.run([output_name], {input_name: im})
  return {CLASS_NAMES[i]: float(prediction[0][0][i]) for i in range(3)}

In [71]:
image = gr.inputs.Image(shape=(224, 224))
label = gr.outputs.Label(num_top_classes=3)
iface = gr.Interface(fn=predict_image, inputs=image, outputs=label, capture_session=True)
iface.launch(debug="True")

  super().__init__(
  super().__init__(num_top_classes=num_top_classes, type=type, label=label)
  iface = gr.Interface(fn=predict_image, inputs=image, outputs=label, capture_session=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

Keyboard interruption in main thread... closing server.


