In [None]:
!pip install -q kaggle

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d muhammadhananasghar/human-emotions-datasethes

In [None]:
!unzip /content/human-emotions-datasethes.zip -d "/content/dataset/"

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import InputLayer, Conv2D, BatchNormalization, MaxPool2D, Flatten, Dense, Dropout, RandomRotation, RandomFlip, RandomContrast, Resizing, Rescaling, MaxPooling2D, GlobalAveragePooling2D, Add, Activation, Input, Embedding, LayerNormalization, MultiHeadAttention, Permute
from tensorflow.keras.regularizers import L2
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy, TopKCategoricalAccuracy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow_probability as tfp
from tensorflow.train import BytesList, FloatList, Int64List
from tensorflow.train import Example, Features, Feature
import matplotlib.pyplot as plt
import cv2
import numpy as np
import sklearn
from sklearn.metrics import confusion_matrix, roc_curve
import seaborn as sns

In [None]:
train_directory = "/content/dataset/Emotions Dataset/Emotions Dataset/train"
val_directory = "/content/dataset/Emotions Dataset/Emotions Dataset/test"
CLASS_NAMES = ["angry", "happy", "sad"]

CONFIGURATION = {
    "BATCH_SIZE": 32,
    "IM_SIZE": 256,
    "LEARNING_RATE": 0.001,
    "N_EPOCHS": 20,
    "DROPOUT_RATE": 0.0,
    "REGULARIZATION_RATE": 0.0,
    "N_FILTERS": 6 ,
    "KERNEL_SIZE": 3,
    "N_STRIDES": 1,
    "POOL_SIZE":2,
    "N_DENSE_1": 100,
    "N_DENSE_2": 10,
    "NUM_CLASSES":3
}

In [None]:
train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_directory,
    labels="inferred",
    label_mode="categorical",
    class_names=CLASS_NAMES,
    color_mode="rgb",
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]),
    shuffle=True,
    seed=99
)

val_dataset = tf.keras.utils.image_dataset_from_directory(
    val_directory,
    labels="inferred",
    label_mode="categorical",
    class_names=CLASS_NAMES,
    color_mode="rgb",
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]),
    shuffle=True,
    seed=99
)

In [None]:
for i in val_dataset.take(1):
  print(i)

In [None]:
plt.figure(figsize=(12,12))

for image, label in train_dataset.take(1):
  for i in range(16):
    ax=plt.subplot(4,4,i+1)
    plt.imshow(image[i]/255.)
    plt.title(CLASS_NAMES[tf.argmax(label[i], axis=0).numpy()])
    plt.axis("off")

In [None]:
augment_layers = tf.keras.Sequential([
    RandomRotation(factor=(-0.025, 0.025)),
    RandomFlip(mode='horizontal'),
    RandomContrast(factor=0.1)
])

def augment_layer(image, label):
  return augment_layers(image, training=True), label

In [None]:
def box(lamda):
  r_x =  tf.cast(tfp.distributions.Uniform(0,CONFIGURATION["IM_SIZE"]).sample(1)[0], dtype= tf.int32)
  r_y = tf.cast(tfp.distributions.Uniform(0,CONFIGURATION["IM_SIZE"]).sample(1)[0], dtype= tf.int32)

  r_w = tf.cast(CONFIGURATION["IM_SIZE"]*tf.math.sqrt(1-lamda), dtype=tf.int32)
  r_h = tf.cast(CONFIGURATION["IM_SIZE"]*tf.math.sqrt(1-lamda), dtype=tf.int32)

  r_x = tf.clip_by_value(r_x - r_w//2, 0, CONFIGURATION["IM_SIZE"])
  r_y = tf.clip_by_value(r_y - r_h//2, 0, CONFIGURATION["IM_SIZE"])

  x_b_r = tf.clip_by_value(r_x + r_w//2, 0, CONFIGURATION["IM_SIZE"])
  y_b_r = tf.clip_by_value(r_y + r_h//2, 0, CONFIGURATION["IM_SIZE"])

  r_w = y_b_r - r_y
  if (r_w == 0):
    r_w = 1
  r_h = x_b_r - r_x
  if (r_h == 0):
    r_h = 1

  return r_x, r_y, r_w, r_h

In [None]:
def cut_mixup(train_dataset_1, train_dataset_2):
  (image_1,label_1), (image_2, label_2) = train_dataset_1, train_dataset_2

  lamda = tfp.distributions.Beta(0.2, 0.2)
  lamda = lamda.sample(1)[0]

  r_y, r_x, r_h, r_w = box(lamda)

  crop_2= tf.image.crop_to_bounding_box(image_2, r_y, r_x, r_h, r_w)
  pad_2 = tf.image.pad_to_bounding_box(crop_2, r_y, r_x, CONFIGURATION["IM_SIZE"],CONFIGURATION["IM_SIZE"])
  crop_1 = tf.image.crop_to_bounding_box(image_1, r_y, r_x, r_h, r_w)
  pad_1 = tf.image.pad_to_bounding_box(crop_1, r_y, r_x, CONFIGURATION["IM_SIZE"],CONFIGURATION["IM_SIZE"])

  image = image_1 - pad_1 + pad_2

  lamda = 1 - (r_h*r_w)/(CONFIGURATION["IM_SIZE"]*CONFIGURATION["IM_SIZE"])
  label = lamda*tf.cast(label_1, dtype=tf.float64) + (1-lamda)*tf.cast(label_2, dtype=tf.float64)

  return image, label

In [None]:
train_dataset_1  = train_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).map(augment_layer)
train_dataset_2  = train_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).map(augment_layer)

mixed_dataset = tf.data.Dataset.zip((train_dataset_1, train_dataset_2))


In [None]:
training_dataset = (train_dataset.map(augment_layer, num_parallel_calls=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE))
# training_dataset = (mixed_dataset.map(cut_mixup, num_parallel_calls=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE))
validation_dataset = (val_dataset.prefetch(tf.data.AUTOTUNE))

# training_dataset = train_dataset.map(augment_layer, num_parallel_calls=tf.data.AUTOTUNE)
# validation_dataset = (val_dataset)

In [None]:
plt.figure(figsize=(12,12))

for image, label in training_dataset.take(1):
  for i in range(16):
    ax=plt.subplot(4,4,i+1)
    plt.imshow(image[i]/255.)
    plt.title(CLASS_NAMES[tf.argmax(label[i], axis=0).numpy()])
    plt.axis("off")

In [None]:
training_dataset = training_dataset.unbatch()
validation_dataset = validation_dataset.unbatch()

In [None]:
def create_example(image, label):

  bytes_feature = Feature(
      bytes_list=BytesList(value=[image]))

  int_feature = Feature(
      int64_list=Int64List(value=[label]))

  example = Example(
      features=Features(feature={
          'images': bytes_feature,
          'labels': int_feature,
      }))

  return example.SerializeToString()

In [None]:
NUM_SHARDS = 10
PATH = 'tfrecords/shard_{:02d}.tfrecord'

In [None]:
# def encode_image(image, label):
#   image = tf.image.convert_image_dtype(image, dtype=tf.uint8)
#   image = tf.io.encode_jpeg(image)
#   return image,label

def encode_image(image, label):
  image = tf.image.convert_image_dtype(image, dtype=tf.uint8)
  image = tf.io.encode_jpeg(image)
  return image,int(tf.argmax(label))

In [None]:
encoded_dataset = (
  training_dataset
  .map(encode_image)
)

In [None]:
for shard_number in range(NUM_SHARDS):

  sharded_dataset = (
      encoded_dataset
      .shard(NUM_SHARDS, shard_number)
      .as_numpy_iterator()
  )

  with tf.io.TFRecordWriter(PATH.format(shard_number)) as file_writer:
    for encoded_image, encoded_label in sharded_dataset:

      example = create_example(encoded_image, encoded_label)
      file_writer.write(example)

In [None]:
recons_dataset = tf.data.TFRecordDataset(filenames=[PATH.format(p) for p in range(NUM_SHARDS)])

In [None]:
def parse_tfrecords(example):

  feature_description = {
      'images': tf.io.FixedLenFeature([], tf.string),
      'labels': tf.io.FixedLenFeature([], tf.int64)
  }

  example = tf.io.parse_single_example(example, feature_description)
  example['images'] = tf.io.decode_jpeg(example['images'], channels=3)

  return example['images'], example['labels']

In [None]:
parsed_dataset = recons_dataset.map(parse_tfrecords).batch(CONFIGURATION["BATCH_SIZE"]).prefetch(tf.data.AUTOTUNE)

In [None]:
parsed_dataset.take(1)

In [None]:
resize_rescale_layer = tf.keras.Sequential([
    Resizing(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]),
    Rescaling(1./255)
])

In [None]:
lenet_model = tf.keras.Sequential([
    InputLayer(input_shape=(None, None, 3)),
    resize_rescale_layer,

    Conv2D(filters = CONFIGURATION["N_FILTERS"], kernel_size = CONFIGURATION["KERNEL_SIZE"],
           strides = CONFIGURATION["N_STRIDES"], padding = 'valid', activation = 'relu', kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"])),
    BatchNormalization(),
    MaxPool2D(pool_size = CONFIGURATION["POOL_SIZE"], strides =  CONFIGURATION["N_STRIDES"] * 2),
    Dropout(rate=CONFIGURATION["DROPOUT_RATE"]),

    Conv2D(filters = CONFIGURATION["N_FILTERS"]* 2 + 4, kernel_size = CONFIGURATION["KERNEL_SIZE"],
           strides = CONFIGURATION["N_STRIDES"], padding = 'valid', activation = 'relu', kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"])),
    BatchNormalization(),
    MaxPool2D(pool_size = CONFIGURATION["POOL_SIZE"], strides =  CONFIGURATION["N_STRIDES"] * 2),

    Flatten(),
    Dense(CONFIGURATION["N_DENSE_1"], activation = 'relu', kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"])),
    BatchNormalization(),
    Dropout(CONFIGURATION["DROPOUT_RATE"]),
    Dense(CONFIGURATION["N_DENSE_2"], activation = 'relu', kernel_regularizer=L2(CONFIGURATION["REGULARIZATION_RATE"])),
    BatchNormalization(),

    Dense(CONFIGURATION["NUM_CLASSES"], activation = 'softmax')
])

lenet_model.summary()

In [None]:
class CustomConv2D(Layer):
  def __init__(self, n_filters, kernel_size, n_strides, padding = 'valid'):
    super(CustomConv2D, self).__init__(name = 'custom_conv2d')

    self.conv = Conv2D(
        filters = n_filters,
        kernel_size = kernel_size,
        activation = 'relu',
        strides = n_strides,
        padding = padding)

    self.batch_norm = BatchNormalization()

  def call(self, x, training = True):

    x = self.conv(x)
    x = self.batch_norm(x, training)

    return x

In [None]:
class ResidualBlock(Layer):
  def __init__(self, n_channels, n_strides = 1):
    super(ResidualBlock, self).__init__(name = 'res_block')

    self.dotted = (n_strides != 1)

    self.custom_conv_1 = CustomConv2D(n_channels, 3, n_strides, padding = "same")
    self.custom_conv_2 = CustomConv2D(n_channels, 3, 1, padding = "same")

    self.activation = Activation('relu')

    if self.dotted:
      self.custom_conv_3 = CustomConv2D(n_channels, 1, n_strides)

  def call(self, input, training):

    x = self.custom_conv_1(input, training)
    x = self.custom_conv_2(x, training)

    if self.dotted:
      x_add = self.custom_conv_3(input, training)
      x_add = Add()([x, x_add])
    else:
      x_add = Add()([x, input])

    return self.activation(x_add)


In [None]:
class ResNet34(Model):
  def __init__(self,):
    super(ResNet34, self).__init__(name = 'resnet_34')

    self.conv_1 = CustomConv2D(64, 7, 2, padding = 'same')
    self.max_pool = MaxPooling2D(3,2)

    self.conv_2_1 = ResidualBlock(64)
    self.conv_2_2 = ResidualBlock(64)
    self.conv_2_3 = ResidualBlock(64)

    self.conv_3_1 = ResidualBlock(128, 2)
    self.conv_3_2 = ResidualBlock(128)
    self.conv_3_3 = ResidualBlock(128)
    self.conv_3_4 = ResidualBlock(128)

    self.conv_4_1 = ResidualBlock(256, 2)
    self.conv_4_2 = ResidualBlock(256)
    self.conv_4_3 = ResidualBlock(256)
    self.conv_4_4 = ResidualBlock(256)
    self.conv_4_5 = ResidualBlock(256)
    self.conv_4_6 = ResidualBlock(256)

    self.conv_5_1 = ResidualBlock(512, 2)
    self.conv_5_2 = ResidualBlock(512)
    self.conv_5_3 = ResidualBlock(512)

    self.global_pool = GlobalAveragePooling2D()

    self.fc_3 = Dense(CONFIGURATION["NUM_CLASSES"], activation = 'softmax')

  def call(self, x, training = True):
    x = self.conv_1(x)
    x = self.max_pool(x)

    x = self.conv_2_1(x, training)
    x = self.conv_2_2(x, training)
    x = self.conv_2_3(x, training)

    x = self.conv_3_1(x, training)
    x = self.conv_3_2(x, training)
    x = self.conv_3_3(x, training)
    x = self.conv_3_4(x, training)

    x = self.conv_4_1(x, training)
    x = self.conv_4_2(x, training)
    x = self.conv_4_3(x, training)
    x = self.conv_4_4(x, training)
    x = self.conv_4_5(x, training)
    x = self.conv_4_6(x, training)

    x = self.conv_5_1(x, training)
    x = self.conv_5_2(x, training)
    x = self.conv_5_3(x, training)

    x = self.global_pool(x)

    return self.fc_3(x)

In [None]:
resnet_34 = ResNet34()
resnet_34(tf.zeros([1, 256, 256, 3]), training = True) # for building resnet model -> For seeing summary
resnet_34.summary()

In [None]:
backbone = tf.keras.applications.efficientnet.EfficientNetB4(
    include_top=False,
    weights="imagenet",
    input_shape=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"], 3),
)

In [None]:
backbone.trainable = False
# backbone.trainable = True

In [None]:
model = tf.keras.Sequential([
    Input(shape=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"], 3)),
    backbone,
    GlobalAveragePooling2D(),
    Dense(CONFIGURATION["N_DENSE_1"], activation = 'relu'),
    BatchNormalization(),
    Dense(CONFIGURATION["N_DENSE_2"], activation = 'relu'),
    Dense(CONFIGURATION["NUM_CLASSES"], activation = 'softmax')
])

model.summary()

In [None]:
input = Input(shape=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"], 3))

x = backbone(input, training = False)

x = GlobalAveragePooling2D()(x)
x = Dense(CONFIGURATION["N_DENSE_1"], activation = 'relu')(x)
x = BatchNormalization()(x)
x = Dense(CONFIGURATION["N_DENSE_2"], activation = 'relu')(x)
x = Dense(CONFIGURATION["NUM_CLASSES"], activation = 'softmax')(x)

model = Model(input, x)
model.summary()

In [None]:
checkpoint_callback = ModelCheckpoint(
    'best_weights',
    monitor='val_accuracy',
    mode = 'max',
    verbose=1,
    save_best_only=True,
    )

In [None]:
loss_function = CategoricalCrossentropy()
metrics = [CategoricalAccuracy(name="accuracy"), TopKCategoricalAccuracy(k=2, name="top_k_accuracy")]

In [None]:
n_sample_0 = 1525 # angary
n_sample_1 = 3019 # happy
n_sample_2 = 2255 # sad

In [None]:
# 6799 -> total inputs
class_weights = {0:6799/n_sample_0, 1:6799/n_sample_1, 2:6799/n_sample_2}

In [None]:
model.compile(optimizer=Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]/100), #use smaller learning rate during fie tuning of pre train model
                    loss=loss_function,
                    metrics=metrics)

In [None]:
history = model.fit(training_dataset, validation_data=validation_dataset, epochs=CONFIGURATION["N_EPOCHS"], verbose=1, class_weight=class_weights)

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(["train_loss", "val_loss"])
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(["train_accuracy", "val_accuracy"])
plt.show()

In [None]:
lenet_model.evaluate(validation_dataset)

In [None]:
test_image = cv2.imread("/content/dataset/Emotions Dataset/Emotions Dataset/test/happy/109046.jpg_brightness_1.jpg")

im = tf.constant(test_image, dtype=tf.float32)
im = tf.expand_dims(im, axis=0)

CLASS_NAMES[(tf.argmax(lenet_model(im), axis=-1)).numpy()[0]]

In [None]:
plt.figure(figsize=(12,12))

for image, label in val_dataset.take(1):
  for i in range(16):
    ax=plt.subplot(4,4,i+1)
    plt.imshow(image[i]/255.)
    plt.title("Predicted Label:" + CLASS_NAMES[(tf.argmax(lenet_model(im), axis=-1)).numpy()[0]] + "\n" + "True Label:" + CLASS_NAMES[tf.argmax(label[i], axis=0).numpy()])
    plt.axis("off")

In [None]:
labels = []
predicted = []

for image, label in val_dataset:
  predicted.append(lenet_model(image))
  labels.append(label.numpy())

In [None]:
print(np.concatenate([np.argmax(labels[:-1], axis=-1).flatten(), np.argmax(labels[-1], axis=-1).flatten()]))
print(np.concatenate([np.argmax(predicted[:-1], axis=-1).flatten(), np.argmax(predicted[-1], axis=-1).flatten()]))

In [None]:
label = np.concatenate([np.argmax(labels[:-1], axis=-1).flatten(), np.argmax(labels[-1], axis=-1).flatten()])
pred  = np.concatenate([np.argmax(predicted[:-1], axis=-1).flatten(), np.argmax(predicted[-1], axis=-1).flatten()])

In [None]:
cm = confusion_matrix(label, pred)
print(cm)

plt.figure(figsize=(8,8))

sns.heatmap(cm, annot=True)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")

In [None]:
vgg_backbone = tf.keras.applications.vgg16.VGG16(
    include_top=False,
    weights="imagenet",
    input_shape=(CONFIGURATION["IM_SIZE"],CONFIGURATION["IM_SIZE"],3),
)

In [None]:
vgg_backbone.summary()

In [None]:
def is_conv(layer_name):
  if "conv" in layer_name:
    return True
  return False

In [None]:
feature_maps = [layer.output for layer in vgg_backbone.layers[1:] if is_conv(layer.name)]
feature_map_model = Model(inputs = vgg_backbone.input, outputs=feature_maps)

feature_map_model.summary()

In [None]:
test_image = cv2.imread("/content/dataset/Emotions Dataset/Emotions Dataset/test/happy/100610.jpg_brightness_1.jpg")
test_image = cv2.resize(test_image, (CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]))

im = tf.constant(test_image, dtype=tf.float32)
im = tf.expand_dims(im, axis=0)

f_maps = feature_map_model.predict(im)

In [None]:
len(f_maps)

In [None]:
for i in range(len(f_maps)):
  print(f_maps[i].shape)

In [None]:
for i in range(len(f_maps)):
  plt.figure(figsize=(256,256))
  f_size = f_maps[i].shape[1]
  n_channels = f_maps[i].shape[3]
  joint_maps = np.ones((f_size, f_size*n_channels))

  axs = plt.subplot(len(f_maps), 1, i+1)
  for j in range(n_channels):
    joint_maps[:, f_size*j:f_size*(j+1)] = f_maps[i][...,j]

  plt.imshow(joint_maps[:,0:512])
  plt.axis("off")

In [None]:
backbone = tf.keras.applications.efficientnet.EfficientNetB5(
    include_top = False,
    weights='imagenet',
    input_shape=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"], 3),
    )
backbone.trainable=False

In [None]:
x = backbone.output

x = GlobalAveragePooling2D()(x)
x = Dense( CONFIGURATION["N_DENSE_1"], activation = "relu")(x)
x = Dense( CONFIGURATION["N_DENSE_2"], activation = "relu")(x)
output = Dense( CONFIGURATION["NUM_CLASSES"], activation = "softmax")(x)

pretrained_model = Model(backbone.inputs, output)
pretrained_model.summary()

In [None]:
test_image = cv2.imread("/content/dataset/Emotions Dataset/Emotions Dataset/test/happy/109046.jpg_brightness_1.jpg")
test_image = cv2.resize(test_image, (CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]))

im = tf.constant(test_image, dtype=tf.float32)
im = tf.expand_dims(im, axis=0)


In [None]:
preds = model.predict(im)

In [None]:
last_conv_layer_name = "top_activation"
last_conv_layer = pretrained_model.get_layer(last_conv_layer_name)
last_conv_layer_model = Model(pretrained_model.inputs, last_conv_layer.output)

In [None]:
last_conv_layer_model.summary()

In [None]:
for layer in pretrained_model.layers:
  print(layer.name)

In [None]:
classifier_layer_names = [
 "global_average_pooling2d_5",
 "dense_15",
 "dense_16",
 "dense_17"
]

In [None]:
classifier_input = Input(shape=(8,8,2048))
x = classifier_input
for layer_name in classifier_layer_names:
 x = pretrained_model.get_layer(layer_name)(x)
classifier_model = Model(classifier_input, x)

In [None]:
with tf.GradientTape() as tape:
  last_conv_layer_output = last_conv_layer_model(im)
  preds = classifier_model(last_conv_layer_output)
  top_pred_index = tf.argmax(preds[0])
  print(top_pred_index)
  top_class_channel = preds[:, top_pred_index]

grads = tape.gradient(top_class_channel, last_conv_layer_output)

In [None]:
grads.shape

In [None]:
pooled_grads = tf.reduce_mean(grads, axis=(0,1,2)).numpy()

In [None]:
pooled_grads.shape

In [None]:
last_conv_layer_output = last_conv_layer_output.numpy()[0]
for i in range(2048):
  last_conv_layer_output[:, :, i] *= pooled_grads[i]

In [None]:
last_conv_layer_output.shape

In [None]:
heatmap = np.sum(last_conv_layer_output, axis=-1)

In [None]:
heatmap=tf.nn.relu(heatmap)
plt.matshow(heatmap)

In [None]:
resized_heatmap=cv2.resize(np.array(heatmap),(256,256))
plt.matshow(resized_heatmap*255+im[0,:,:,0]/255)

In [None]:
input = Input(shape=(CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"], 3))

y_1 = model(input)
y_2 = pretrained_model(input)

output = 0.5*y_1 + 0.5*y_2

ensemble_model = Model(inputs=input, outputs=output)

In [None]:
ensemble_model.compile(optimizer=Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
                    loss=loss_function,
                    metrics=metrics)

In [None]:
test_image = cv2.imread("/content/dataset/Emotions Dataset/Emotions Dataset/test/happy/109046.jpg_brightness_1.jpg")
test_image = cv2.resize(test_image, (CONFIGURATION["IM_SIZE"], CONFIGURATION["IM_SIZE"]))

im = tf.expand_dims(test_image, axis=0)

patches = tf.image.extract_patches(
    images=im,
    sizes=[1, 16, 16, 1],
    strides=[1, 16, 16, 1],
    rates=[1, 1, 1, 1],
    padding='VALID',
)

In [None]:
print(patches.shape)
patches = tf.reshape(patches, (patches.shape[0],256, 768))
print(patches.shape)

In [None]:
plt.figure(figsize=(8,8))

for i in range(patches.shape[1]):
    ax = plt.subplot(16,16, i+1)
    plt.imshow(tf.reshape(patches[0,i,:], (16, 16, 3)))
    plt.axis("off")

In [None]:
class PatchEncoder(Layer):
  def __init__(self, N_PATCHES, HIDDEN_SIZE):
    super(PatchEncoder, self).__init__(name = 'patch_encoder')

    self.linear_projection = Dense(HIDDEN_SIZE)
    self.positional_embedding = Embedding(N_PATCHES, HIDDEN_SIZE)
    self.N_PATCHES = N_PATCHES

  def call(self, x):
    patches = tf.image.extract_patches(
      images=x,
      sizes=[1, 16, 16, 1],
      strides=[1, 16, 16, 1],
      rates=[1, 1, 1, 1],
      padding='VALID',
      )
    patches = tf.reshape(patches, (tf.shape(patches)[0] ,256, patches.shape[-1]))

    embedding_input = tf.range(start=0, limit=self.N_PATCHES, delta=1)
    output =  self.linear_projection(patches) + self.positional_embedding(embedding_input,)

    return output

In [None]:
patch_enc = PatchEncoder(256, 768)
patch_enc(tf.zeros([2, 256, 256, 3]))

In [None]:
class TransformerEncoder(Layer):
  def __init__(self, N_HEADS, HIDDEN_SIZE):
    super(TransformerEncoder, self).__init__(name = 'transformer_encoder')

    self.layer_norm_1 = LayerNormalization()
    self.layer_norm_2 = LayerNormalization()

    self.multihead_att = MultiHeadAttention(N_HEADS, HIDDEN_SIZE)

    self.dense_1 = Dense(HIDDEN_SIZE, activation = tf.nn.gelu)
    self.dense_2 = Dense(HIDDEN_SIZE, activation = tf.nn.gelu)

  def call(self, input):
    x_1 = self.layer_norm_1(input)
    x_1 =  self.multihead_att(x_1, x_1)

    x_1 = Add()([x_1, input])

    x_2 = self.layer_norm_2(x_1)
    x_2 = self.dense_1(x_2)
    output = self.dense_2(x_2)

    output = Add()([output, x_1])

    return output

In [None]:
trans_enc = TransformerEncoder(8, 768)
trans_enc(tf.zeros([1, 256, 768]))

In [None]:
class ViT(Model):
  def __init__(self, N_HEADS, HIDDEN_SIZE, N_PATCHES, N_LAYERS, N_DENSE_UNIT):
    super(ViT, self).__init__(name = 'vision_transformer')

    self.N_LAYERS = N_LAYERS

    self.patch_encoder = PatchEncoder(N_PATCHES, HIDDEN_SIZE)
    self.transformer_encoder = [TransformerEncoder(N_HEADS, HIDDEN_SIZE) for _ in range(N_LAYERS)]

    self.dense_1 = Dense(N_DENSE_UNIT, tf.nn.gelu)
    self.dense_2 = Dense(N_DENSE_UNIT, tf.nn.gelu)
    self.dense_3 = Dense(CONFIGURATION["NUM_CLASSES"], activation="softmax")

  def call(self, input, training = True):
    x = self.patch_encoder(input)

    for i in range(self.N_LAYERS):
      x = self.transformer_encoder[i](x)

    x = Flatten()(x)
    x = self.dense_1(x)
    x = self.dense_2(x)

    return self.dense_3(x)


In [None]:
vit = ViT(8, 768, 256, 4, 1024)
vit(tf.zeros([32, 256, 256, 3]))

In [None]:
vit.summary()

In [None]:
vit.compile(optimizer=Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
                    loss=loss_function,
                    metrics=metrics)

In [None]:
vit.fit(training_dataset, validation_data=validation_dataset, epochs=CONFIGURATION["N_EPOCHS"], verbose=1)

In [None]:
!pip install transformers

In [None]:
from transformers import ViTConfig, ViTModel

# Initializing a ViT vit-base-patch16-224 style configuration
configuration = ViTConfig(hidden_size=144)

# Initializing a model (with random weights) from the vit-base-patch16-224 style configuration
model = ViTModel(configuration)

# Accessing the model configuration
configuration = model.config

In [None]:
configuration

In [None]:
resize_rescale_hf = tf.keras.Sequential([
    Resizing(224, 224),
    Rescaling(1./255),
    Permute((3,1,2))
])

In [None]:
from transformers import AutoImageProcessor, TFViTModel

base_model = TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k")
inputs = Input(shape=(256, 256, 3))

x = resize_rescale_hf(inputs)
x = base_model.vit(x)[0][:, 0, :] # we are only intrested in class embbeding output

output = Dense(CONFIGURATION["NUM_CLASSES"], activation = "softmax")(x)

hf_model = Model(inputs=inputs, outputs=output)