# Preparation for Colab

Make sure you're running a GPU runtime; if not, select "GPU" as the hardware accelerator in Runtime > Change Runtime Type in the menu. The next cells will install the `clip` package and its dependencies, and check if PyTorch 1.7.1 or later is installed.

In [35]:
# # ! python -m pip install --upgrade pip
# ! pip install ftfy regex tqdm
# # ! pip install git+https://github.com/openai/CLIP.git

# ! pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# # ! pip install --upgrade --force-reinstall torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1
# # (use --upgrade --force-reinstall to upgrade)

# ! pip install scikit-image
# ! pip install matplotlib
# ! pip install opencv-python
# ! pip install setuptools
# # ! pip install --upgrade --force-reinstall transformers==4.36.0
# ! pip install transformers
# ! pip install tensorflow
# ! pip install tf-keras
! pip install scikit-learn
# ! pip show transformers



Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp39-cp39-win_amd64.whl.metadata (12 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.1-cp39-cp39-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ---------------------------------------- 11.0/11.0 MB 68.8 MB/s eta 0:00:00
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.5.1 threadpoolctl-3.5.0


In [36]:
import os
import skimage
import IPython.display
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import json

from collections import OrderedDict
import torch
import torch.nn as nn

import tensorflow as tf

from sklearn.model_selection import train_test_split

from pkg_resources import packaging

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [3]:
import numpy as np
import torch
from pkg_resources import packaging

print("Torch version:", torch.__version__)
print(torch. cuda. is_available())
print("CUDA version:", torch.version.cuda)

Torch version: 2.4.0+cu118
True
CUDA version: 11.8


# Loading the model

In [4]:
from transformers import RobertaTokenizerFast, TFRobertaForSequenceClassification, pipeline

tokenizer = RobertaTokenizerFast.from_pretrained("arpanghoshal/EmoRoBERTa")
model = TFRobertaForSequenceClassification.from_pretrained("arpanghoshal/EmoRoBERTa")

  from .autonotebook import tqdm as notebook_tqdm










All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at arpanghoshal/EmoRoBERTa.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


### Predict using pipeline

In [31]:
emotion = pipeline('sentiment-analysis', 
                    model='arpanghoshal/EmoRoBERTa')

emotion_labels = emotion("Thanks for using it.")
print(emotion_labels)

401 Client Error. (Request ID: Root=1-66c2789a-111f5628716a9b8a142cb734;8186a7b4-e90f-4ee0-bb4a-8c4e3a60bd48)

Cannot access gated repo for url https://huggingface.co/arpanghoshal/EmoRoBERTa/resolve/main/tf_model.h5.
Access to model arpanghoshal/EmoRoBERTa is restricted. You must be authenticated to access it.
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at arpanghoshal/EmoRoBERTa.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'label': 'gratitude', 'score': 0.9964383840560913}]


### Predict locally

In [42]:
def predict_emotion(text):
    inputs = tokenizer(text, return_tensors="pt")
    # convert inputs to tensorflow
    inputs = {name: tf.convert_to_tensor(tensor) for name, tensor in inputs.items()}

    with torch.no_grad():
        logits = model(inputs).logits

    # torch
    # predicted_class_id = logits.argmax().item()
    # model.config.id2label[predicted_class_id]

    # tf
    predicted_class_id = tf.argmax(logits, axis=1).numpy()[0]
    model.config.id2label[predicted_class_id]

    return emotion_labels



text = "Hello, my dog is cute"
predict_emotion(text)

[{'label': 'gratitude', 'score': 0.9964383840560913}]

# Prepare data

In [37]:
with open('data/Subtask_2_train.json') as f:
    text_data_list = json.load(f)
    # text data is a map of conversation id to conversation
    text_data = {text['conversation_ID']: text for text in text_data_list}

# list of video filenames
texts = []
video_fnames_list = []
n_utts = 0
labels = []

for c_id, conv in text_data.items():
    n_utts += len(conv['conversation'])
    for utt in conv['conversation']:
        # u_id = utt['utterance_ID']
        texts.append(utt['text'])
        labels.append(utt['emotion'])
        video_fnames_list.append(utt['video_name'])

print(len(video_fnames_list), n_utts)

13619 13619


In [38]:
print(texts[:3])
print(labels[:3])

['Alright , so I am back in high school , I am standing in the middle of the cafeteria , and I realize I am totally naked .', 'Oh , yeah . Had that dream .', 'Then I look down , and I realize there is a phone ... there .']
['neutral', 'neutral', 'surprise']


In [39]:
### Old split method - splitting features directly

# X_train, X_test, Y_train, Y_test = train_test_split(X.cpu().numpy(), Y.cpu().numpy(), test_size=0.2, random_state=42)

# X_train = torch.tensor(X_train).cuda()
# X_test = torch.tensor(X_test).cuda()
# Y_train = torch.tensor(Y_train).cuda()
# Y_test = torch.tensor(Y_test).cuda()

# print(X_train.shape, Y_train.shape)
# print(X_test.shape, Y_test.shape)

# class_counts = Y_train.sum(dim=0)
# class_counts

###################################################

### New split method - splitting indices

train_indices, test_indices = train_test_split(range(len(video_fnames_list)), test_size=0.2, random_state=42)

# lengths
print(len(train_indices), len(test_indices))

# create a list of video filenames, X_train, X_test, Y_train, Y_test

train_video_fnames = [video_fnames_list[i] for i in train_indices]
test_video_fnames = [video_fnames_list[i] for i in test_indices]

# split texts and labels by indices
train_texts = [texts[i] for i in train_indices]
test_texts = [texts[i] for i in test_indices]

train_labels = [labels[i] for i in train_indices]
test_labels = [labels[i] for i in test_indices]

10895 2724


# Classify
## Zero-shot classification

In [44]:
test_predicted_labels = []

for i, text in enumerate(test_texts):
    prediction = predict_emotion(text)
    predicted_emotion = prediction[0]['label']
    test_predicted_labels.append(predicted_emotion)

print(test_predicted_labels[:5])

KeyboardInterrupt: 

In [None]:
print(set(test_predicted_labels))
print(set(test_labels))

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
plt.rc('figure', figsize=(9, 5))

labels_ordered = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']

cm = confusion_matrix(test_labels, test_predicted_labels, labels=labels_ordered)
plt.figure()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels_ordered, yticklabels=labels_ordered)
plt.savefig(f'./experiments/plots/EkmanClassifier/confusion_matrix.png')
plt.show()

## Fine-tuning

In [None]:
# encode
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

# convert to tensors
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_labels
))

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    test_labels
))

# shuffle and batch
train_dataset = train_dataset.shuffle(100).batch(16)
test_dataset = test_dataset.batch(16)

loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# train
model.compile(optimizer='adam',
                loss=loss,
                metrics=['accuracy'])
model.fit(train_dataset, epochs=2)

# evaluate
model.evaluate(test_dataset)