# Testing the Vision Transformer (ViT) models on the benchmarks
In this notebook is present: 

*  The code to test the trained Keras ViT model (training is done on the isti VM) using the benchmark "Twitter Testing Dataset I" and the other dataset present in the documentation.



In [None]:
!pip install tensorflow==2.8 --quiet
!apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2 --quiet
!pip install tensorflow_addons --quiet
!pip install vit_keras --quiet

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa

from vit_keras import vit
from tqdm.auto import tqdm
from tensorflow import keras
from tensorflow.keras import layers
from IPython.display import display
from IPython.display import clear_output
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import StratifiedKFold

Mounted at /content/drive


In [None]:
BASE_DIR = "/content/drive/MyDrive/Thesis/"
PREDICTION_DIR = BASE_DIR + "predictions/"
BENCHMARK_DIR = BASE_DIR + "dataset/benchmark/"

TWITTER_1_DIR = BENCHMARK_DIR + "Twitter1269/"
TWITTER_2_DIR = BENCHMARK_DIR + "AMT_Twitter/"
EMOTION_ROI_DIR = BENCHMARK_DIR + "EmotionROI/"
FI_DIR = BENCHMARK_DIR + "emotion_dataset/"

VERSION_BASE = BASE_DIR + "dataset/t4sa_2.0/"
VERSION_1_DIR = VERSION_BASE + "version_1/models/"
VERSION_2_DIR = VERSION_BASE + "version_2/models/"
VERSION_3_DIR = VERSION_BASE + "version_3/models/"
VERSION_4_DIR = VERSION_BASE + "version_4/models/"
VERSION_5_DIR = VERSION_BASE + "version_5/models/"
VERSION_6_DIR = VERSION_BASE + "version_6/models/"
VERSION_6_1_DIR = VERSION_BASE + "version_6_1/models/"
VERSION_6_2_DIR = VERSION_BASE + "version_6_2/models/"

image_size_mapper = {"b16": 384, "b32":224, "b32_384":384, "l16":384, "l32":384}

# Testing the Keras ViT with the "Twitter Testing Dataset I"

In [None]:
# Utilities
map_dataset_to_folder = {TWITTER_1_DIR + "3agree.csv":TWITTER_1_DIR + "twitter1/", 
                         TWITTER_1_DIR + "4agree.csv":TWITTER_1_DIR + "twitter1/",
                         TWITTER_1_DIR + "5agree.csv":TWITTER_1_DIR + "twitter1/",
                         TWITTER_2_DIR + "twitter_testing_2.csv": TWITTER_2_DIR + "imgs/", 
                         FI_DIR + "FI.csv": FI_DIR,
                         EMOTION_ROI_DIR + "emotion_ROI_test.csv": EMOTION_ROI_DIR + "images/", 
                         EMOTION_ROI_DIR + "emotion_ROI_train.csv": EMOTION_ROI_DIR + "images/", 
                         EMOTION_ROI_DIR + "emotion_ROI_complete.csv": EMOTION_ROI_DIR + "images/"}


def load_image_tf(path, vit_model, dataset_name):
  '''Decodes the image specified by the path in input and applies some preprocessing to it.'''
  image_data = tf.io.read_file(map_dataset_to_folder[dataset_name] + path)   # read image file
  image = tf.image.decode_image(image_data, channels=3, expand_animations=False)  # decode image data as RGB (do not load whole animations, i.e., GIFs)
  image = tf.image.resize(image, (image_size_mapper[vit_model], image_size_mapper[vit_model])) # resize
  
  if vit_model in ["b32", "l16", "l32"]:
    image = vit.preprocess_inputs(image)   

  return image


def get_dataset(annot, vit_model, dataset_name=TWITTER_1_DIR + "3agree.csv", batch_size=32):
  '''Returns a tf.data.Dataset that maps image and labels taken from the dataframe in input.'''
  x = annot['path'].to_list()
  labels = annot['class'].apply(int).to_list()
  y = [elem if elem == 0 else 2 for elem in labels]

  # Buld a tensorflow dataset
  data = tf.data.Dataset.from_tensor_slices((x, y))

  # Map the dataset with the preprocessing function
  data = data.map(
      lambda x, y: (load_image_tf(x, vit_model, dataset_name), y),  # path -> image, keep y unaltered
      num_parallel_calls=tf.data.AUTOTUNE,  # load in parallel
      deterministic=True  # keep the order 
    ).batch(batch_size)

  return data


def get_model_architecture(vit_model):
  '''Returns a tf.data.Dataset that maps image and labels taken from the dataframe in input.'''
  # Load the base ViT model
  if vit_model == "l16":
    vit_model = vit.vit_l16(
            image_size = image_size_mapper[vit_model],
            pretrained = True,
            include_top = False,
            pretrained_top = False
            )
  elif vit_model == "b16":
    vit_model = vit.vit_b16(
            image_size = image_size_mapper[vit_model],
            pretrained = True,
            include_top = False,
            pretrained_top = False
            )
  elif vit_model == "l32":
    vit_model = vit.vit_l32(
            image_size = image_size_mapper[vit_model],
            pretrained = True,
            include_top = False,
            pretrained_top = False
            )
  elif vit_model == "b_32":
    vit_model = vit.vit_b32(
            image_size = image_size_mapper[vit_model],
            pretrained = True,
            include_top = False,
            pretrained_top = False
            )

  vit_model.trainable = False

  # Add the classification head
  model = tf.keras.Sequential([
          vit_model,
          layers.Dense(32, activation = tfa.activations.gelu),
          layers.Dense(3, activation ='softmax')
      ],
      name = 'vision_transformer')

  return model


def predict_model(model_name, dataset_name=TWITTER_1_DIR + "3agree.csv", vit_model="b32"):
  '''Returns the accuracy obtained on the benchmark passed in input with the model corresponding to the path given by 'model_name'.'''
  
  model = get_model_architecture(vit_model) # Get the model structure
  
  test_annot = pd.read_csv(dataset_name) # get the dataframe with the gold labels
  benchmark = get_dataset(test_annot, vit_model, dataset_name)

  model.load_weights(model_name)            # load the saved weights 

  # Add the rescaling layers, since models have been trained with pixel value in range [0, 1]
  if vit_model == "b32":
    complete_model = tf.keras.Sequential([
                  layers.Rescaling(1.0/255),                   
                  model
    ], name="complete_model")
  else:
    complete_model = model

  predictions = complete_model.predict(benchmark)    # predict the labels

  bin_predictions = np.delete(predictions, 1, 1)  # remove the Neutral prediction, since the benchmark is a binary classification problem
  pred_labels = np.argmax(bin_predictions, axis=1).tolist()
  
  gold_labels = test_annot['class'].apply(int).tolist()

  curr_accuracy = accuracy_score(pred_labels, gold_labels)  # compute the accuracy
  clear_output(wait=True)               # clear the output just to prettify

  return curr_accuracy
