# **Emotion Classifier**



## 1. Libraries

In [1]:
# Installing libraries

!pip install datasets
!pip install transformers
!pip install nltk emoji==0.6.0
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.4.0-py3-none-any.whl (365 kB)
[K     |████████████████████████████████| 365 kB 4.2 MB/s 
Collecting xxhash
  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 85.5 MB/s 
[?25hCollecting fsspec[http]>=2021.11.1
  Downloading fsspec-2022.7.1-py3-none-any.whl (141 kB)
[K     |████████████████████████████████| 141 kB 87.5 MB/s 
[?25hCollecting huggingface-hub<1.0.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 15.1 MB/s 
Collecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting multiprocess
  Downloading multiprocess-0.70.13-py37-none-any.whl (115 kB)
[K     |████████████████████████████████| 115 kB 57.6 MB/s 
Collecting pyyaml>=5.1
  Downloading

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm

from datasets import load_dataset
from sklearn import metrics
from torch.utils.data import DataLoader, Dataset
import tensorflow as tf
import tensorflow_addons as tfa

import transformers
from transformers import BertTokenizer, AutoTokenizer
from keras.preprocessing.sequence import pad_sequences

## Dataset

In [5]:
# Loading dataset splits
semeval = load_dataset('sem_eval_2018_task_1', 'subtask5.english')
data = semeval.data

label_cols = ['anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust']

train_set = data["train"].to_pandas()
val_set = data["validation"].to_pandas()
test_set = data["test"].to_pandas()

train_set.drop('ID', inplace=True, axis=1)
val_set.drop('ID', inplace=True, axis=1)
test_set.drop('ID', inplace=True, axis=1)

frames = [train_set, val_set]
train_set = pd.concat(frames)
train_set = train_set.reset_index(drop=True)


print(train_set.shape, val_set.shape, test_set.shape)


test_set.head()

Downloading builder script:   0%|          | 0.00/1.95k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

Downloading and preparing dataset sem_eval_2018_task_1/subtask5.english (download: 5.70 MiB, generated: 1.24 MiB, post-processed: Unknown size, total: 6.94 MiB) to /root/.cache/huggingface/datasets/sem_eval_2018_task_1/subtask5.english/1.1.0/a7c0de8b805f1988b118882fb289ccfbbeb9085c7820b6f046b5887e234af182...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/5.98M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/6838 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3259 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/886 [00:00<?, ? examples/s]

Dataset sem_eval_2018_task_1 downloaded and prepared to /root/.cache/huggingface/datasets/sem_eval_2018_task_1/subtask5.english/1.1.0/a7c0de8b805f1988b118882fb289ccfbbeb9085c7820b6f046b5887e234af182. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

(7724, 12) (886, 12) (3259, 12)


Unnamed: 0,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
0,@Adnan__786__ @AsYouNotWish Dont worry Indian ...,True,True,False,False,False,False,True,False,False,False,True
1,"Academy of Sciences, eschews the normally sobe...",False,False,True,False,False,False,False,False,False,False,False
2,I blew that opportunity -__- #mad,True,False,True,False,False,False,False,False,True,False,False
3,This time in 2 weeks I will be 30... 😥,False,False,False,False,True,False,False,False,True,False,False
4,#Deppression is real. Partners w/ #depressed p...,False,False,False,True,False,False,False,False,True,False,False


In [6]:
# Use with BERTweet only for preprocessing

from emoji import demojize
from nltk.tokenize import TweetTokenizer


tokenizer = TweetTokenizer()


def normalizeToken(token):
    lowercased_token = token.lower()
    if token.startswith("@"):
        return "@USER"
    elif lowercased_token.startswith("http") or lowercased_token.startswith("www"):
        return "HTTPURL"
    elif len(token) == 1:
        return demojize(token)
    else:
        if token == "’":
            return "'"
        elif token == "…":
            return "..."
        else:
            return token


def normalizeTweet(tweet):
    tokens = tokenizer.tokenize(tweet.replace("’", "'").replace("…", "..."))
    normTweet = " ".join([normalizeToken(token) for token in tokens])

    normTweet = (
        normTweet.replace("cannot ", "can not ")
        .replace("n't ", " n't ")
        .replace("n 't ", " n't ")
        .replace("ca n't", "can't")
        .replace("ai n't", "ain't")
    )
    normTweet = (
        normTweet.replace("'m ", " 'm ")
        .replace("'re ", " 're ")
        .replace("'s ", " 's ")
        .replace("'ll ", " 'll ")
        .replace("'d ", " 'd ")
        .replace("'ve ", " 've ")
    )
    normTweet = (
        normTweet.replace(" p . m .", "  p.m.")
        .replace(" p . m ", " p.m ")
        .replace(" a . m .", " a.m.")
        .replace(" a . m ", " a.m ")
    )

    return " ".join(normTweet.split())

In [7]:
# Normalise dataset splits for BERTweet
def normalise_dataset(dataset):
  for entry in dataset['Tweet']:
    dataset['Tweet'] = dataset['Tweet'].replace(entry, normalizeTweet(entry))


normalise_dataset(train_set)
normalise_dataset(val_set)
normalise_dataset(test_set)

In [7]:
train_set.head()

Unnamed: 0,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
0,“ Worry is a down payment on a problem you may...,False,True,False,False,False,False,True,False,False,False,True
1,Whatever you decide to do make sure it makes y...,False,False,False,False,True,True,True,False,False,False,False
2,@USER it also helps that the majority of NFL c...,True,False,True,False,True,False,True,False,False,False,False
3,Accept the challenges so that you can literall...,False,False,False,False,True,False,True,False,False,False,False
4,My roommate : it 's okay that we can't spell b...,True,False,True,False,False,False,False,False,False,False,False


In [8]:
tf.random.set_seed(1234) # Set random seed


# uncomment desired model to use
#bert_model_name = "bert-base-uncased"
#bert_model_name = "bert-large-uncased"
#bert_model_name = "vinai/bertweet-base"
bert_model_name = "vinai/bertweet-large"

tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
MAX_LEN = 64


# Create tokenized inputs and attention masks for the padded inputs
def tokenize_sentences(sentences, tokenizer, max_seq_len = 64):
    tokenized_sentences = []

    for sentence in tqdm(sentences):
        tokenized_sentence = tokenizer.encode(sentence,add_special_tokens = True,max_length = max_seq_len)
        
        tokenized_sentences.append(tokenized_sentence)

    return tokenized_sentences

def create_attention_masks(tokenized_and_padded_sentences):
    attention_masks = []

    for sentence in tokenized_and_padded_sentences:
        att_mask = [int(token_id > 0) for token_id in sentence]
        attention_masks.append(att_mask)

    return np.asarray(attention_masks)

train_input_ids = tokenize_sentences(train_set['Tweet'], tokenizer, MAX_LEN)
train_inputs = pad_sequences(train_input_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
train_masks = create_attention_masks(train_inputs)

val_input_ids = tokenize_sentences(val_set['Tweet'], tokenizer, MAX_LEN)
val_inputs = pad_sequences(val_input_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
val_masks = create_attention_masks(val_inputs)

test_input_ids = tokenize_sentences(test_set['Tweet'], tokenizer, MAX_LEN)
test_inputs = pad_sequences(test_input_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")
test_masks = create_attention_masks(test_inputs)

train_labels =  train_set[label_cols].values
val_labels = val_set[label_cols]
test_labels = test_set[label_cols].values

Downloading config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

  0%|          | 0/7724 [00:00<?, ?it/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


  0%|          | 0/886 [00:00<?, ?it/s]

  0%|          | 0/3259 [00:00<?, ?it/s]

In [9]:
print(test_input_ids[3])
print(tokenizer.decode(test_input_ids[3]))

[0, 713, 86, 11, 132, 688, 38, 40, 28, 389, 1666, 4832, 29, 625, 1215, 4297, 1215, 5982, 36024, 1215, 9021, 35, 2]
<s>This time in 2 weeks I will be 30... :sad_but_relieved_face:</s>


In [9]:
# Batch, shuffle and repeat datasets

BATCH_SIZE = 16
NR_EPOCHS = 3

def create_dataset(data_tuple, epochs=1, batch_size=32, buffer_size=10000, train=True):
    dataset = tf.data.Dataset.from_tensor_slices(data_tuple)
    if train:
        dataset = dataset.shuffle(buffer_size=buffer_size)
    dataset = dataset.repeat(epochs)
    dataset = dataset.batch(batch_size)
    if train:
        dataset = dataset.prefetch(1)
    
    return dataset

train_dataset = create_dataset((train_inputs, train_masks, train_labels), epochs=NR_EPOCHS, batch_size=BATCH_SIZE)
validation_dataset = create_dataset((val_inputs, val_masks, val_labels), epochs=NR_EPOCHS, batch_size=BATCH_SIZE)
test_dataset = create_dataset((test_inputs, test_masks), epochs=1, batch_size=BATCH_SIZE, train=False)

## 3. BERT model


In [13]:
from transformers import TFBertModel, TFAutoModel, AutoTokenizer, TFRobertaModel


class AutoClassifier(tf.keras.Model):
      def __init__(self, bert: TFRobertaModel, num_classes: int):
        super().__init__()
        self.bert = bert
        self.classifier = tf.keras.layers.Dense(num_classes, activation='sigmoid')
        self.pre_classifier = tf.keras.layers.Dense(2048, activation='relu')

      @tf.function
      def call(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
        outputs = self.bert(input_ids,
                               attention_mask=attention_mask,
                               token_type_ids=token_type_ids,
                               position_ids=position_ids,
                               head_mask=head_mask)


        cls_output = outputs[1]
        cls_output = self.pre_classifier(cls_output)
        cls_output = self.classifier(cls_output)
                
        return cls_output


class BertClassifier(tf.keras.Model):    
    def __init__(self, bert: TFBertModel, num_classes: int):
        super().__init__()

        self.bert = bert
        self.classifier = tf.keras.layers.Dense(num_classes, activation='sigmoid')
        
    @tf.function
    def call(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
        outputs = self.bert(input_ids,
                               attention_mask=attention_mask,
                               token_type_ids=token_type_ids,
                               position_ids=position_ids,
                               head_mask=head_mask)
        cls_output = outputs[1]
        cls_output = self.classifier(cls_output)
                
        return cls_output

print(len(label_cols))

model = AutoClassifier(TFAutoModel.from_pretrained(bert_model_name), len(label_cols)) # use with BERTweet
#model = BertClassifier(TFBertModel.from_pretrained(bert_model_name), len(label_cols)) # use with BERT

11


Some layers from the model checkpoint at vinai/bertweet-large were not used when initializing TFRobertaModel: ['lm_head']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFRobertaModel were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['roberta/pooler/dense/bias:0', 'roberta/pooler/dense/kernel:0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
print(train_inputs[:1])

model(train_inputs[:1],train_masks[:1]).numpy()
model.summary()

[[    0    17    48   305 17649    16    10   159  3207    15    10   936
     47   189   393    33   128   479 12181 11392   479   849 25331 38591
    849 23240  4128   849   605 17649     2     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0]]
(1, 1024)
(1, 64, 1024)
(1, 1024)
(1, 64, 1024)
Model: "auto_classifier_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 tf_roberta_model_1 (TFRober  multiple                 355359744 
 taModel)                                                        
                                                                 
 dense_2 (Dense)             multiple                  22539     
                                                                 
 dense_3 (Dense)             multiple                  2099200   

## 4. Training

In [15]:
import time

INIT_LR = 2e-7
MAX_LR = 2e-5

# Loss Function
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)
train_loss = tf.keras.metrics.Mean(name='train_loss')
validation_loss = tf.keras.metrics.Mean(name='test_loss')



# Optimizer
steps_per_epoch = len(train_dataset) // BATCH_SIZE


clr = tfa.optimizers.CyclicalLearningRate(initial_learning_rate=INIT_LR,
    maximal_learning_rate=MAX_LR,
    scale_fn=lambda x: 1/(2.**(x-1)),
    step_size=4 * steps_per_epoch
)


optimizer = tf.keras.optimizers.Adam(clr)


# Metrics

train_auc_metrics = [tf.keras.metrics.AUC() for i in range(len(label_cols))]
validation_auc_metrics = [tf.keras.metrics.AUC() for i in range(len(label_cols))]
train_loss_results = []
val_loss_results = []

# Train step

@tf.function
def train_step(model, token_ids, masks, labels):
    with tf.GradientTape() as tape:
        predictions = model(token_ids, attention_mask = masks, training=True)
        loss_value = loss_fn(labels, predictions)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_loss(loss_value)

    for i, auc in enumerate(train_auc_metrics):
        auc.update_state(labels[:,i], predictions[:,i])


# Validate step

@tf.function
def validation_step(model, token_ids, masks, labels):
    predictions = model(token_ids, attention_mask=masks, training=False)
    v_loss = loss_fn(labels, predictions)
    validation_loss(v_loss)

    for i, auc in enumerate(validation_auc_metrics):
        auc.update_state(labels[:,i], predictions[:,i])

# Train loop


def train(model, train_dataset, val_dataset, epochs):
  for epoch in range(epochs):

    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    for step, (token_ids, masks, labels) in enumerate(tqdm(train_dataset)):

      train_step(model, token_ids, masks, labels)
      if step % 100 == 0:
        train_loss_results.append(train_loss.result())
        print(f'\nTrain Step: {step}, Loss: {train_loss.result()}')
        for i, label_name in enumerate(label_cols):
          print(f"{label_name} roc_auc {train_auc_metrics[i].result()}")
          train_auc_metrics[i].reset_states()

    for i, (token_ids, masks, labels) in enumerate(tqdm(val_dataset)):
            validation_step(model, token_ids, masks, labels)

    # Display metrics at the end of each epoch.
    print(f'\nEpoch {epoch+1}, Validation Loss: {validation_loss.result()}, Time: {time.time()-start_time}\n')
    
    for i, label_name in enumerate(label_cols):
      print(f"{label_name} roc_auc {validation_auc_metrics[i].result()}")
      validation_auc_metrics[i].reset_states()
    val_loss_results.append(validation_loss.result())

    print('\n')

    model.save_weights("/content/drive/MyDrive/Models/model{}.h5".format(epoch+1))

train(model, train_dataset, validation_dataset, epochs=3)


Start of epoch 0


  0%|          | 0/1449 [00:00<?, ?it/s]

(16, 1024)
(16, 64, 1024)

Train Step: 0, Loss: 0.6801455020904541
anger roc_auc 0.60317462682724
anticipation roc_auc 0.9333333373069763
disgust roc_auc 0.5666666626930237
fear roc_auc 0.375
joy roc_auc 0.6090909242630005
love roc_auc 0.42307692766189575
optimism roc_auc 0.2499999850988388
pessimism roc_auc 0.4333333373069763
sadness roc_auc 0.5714285969734192
surprise roc_auc 0.0
trust roc_auc 0.0

Train Step: 100, Loss: 0.5744041800498962
anger roc_auc 0.49402981996536255
anticipation roc_auc 0.4739181101322174
disgust roc_auc 0.5120114684104919
fear roc_auc 0.4979463219642639
joy roc_auc 0.4899066090583801
love roc_auc 0.4356119930744171
optimism roc_auc 0.4800013303756714
pessimism roc_auc 0.47893020510673523
sadness roc_auc 0.5332367420196533
surprise roc_auc 0.522722601890564
trust roc_auc 0.3843015730381012

Train Step: 200, Loss: 0.5247601866722107
anger roc_auc 0.5442703366279602
anticipation roc_auc 0.47971174120903015
disgust roc_auc 0.5534433126449585
fear roc_auc 0.489053

  0%|          | 0/167 [00:00<?, ?it/s]

(2, 1024)
(2, 64, 1024)

Epoch 1, Validation Loss: 0.23043091595172882, Time: 1110.9833052158356

anger roc_auc 0.9563015699386597
anticipation roc_auc 0.8329300284385681
disgust roc_auc 0.9480818510055542
fear roc_auc 0.9770610928535461
joy roc_auc 0.9722254276275635
love roc_auc 0.9456188678741455
optimism roc_auc 0.9401110410690308
pessimism roc_auc 0.8719111680984497
sadness roc_auc 0.9229841828346252
surprise roc_auc 0.898571252822876
trust roc_auc 0.8618782162666321



Start of epoch 1


  0%|          | 0/1449 [00:00<?, ?it/s]


Train Step: 0, Loss: 0.32813507318496704
anger roc_auc 0.9353065490722656
anticipation roc_auc 0.8087064027786255
disgust roc_auc 0.892758309841156
fear roc_auc 0.9403702020645142
joy roc_auc 0.9507935047149658
love roc_auc 0.9286928176879883
optimism roc_auc 0.9220690131187439
pessimism roc_auc 0.8437144756317139
sadness roc_auc 0.8779319524765015
surprise roc_auc 0.879628598690033
trust roc_auc 0.904224157333374

Train Step: 100, Loss: 0.3219512403011322
anger roc_auc 0.9618391990661621
anticipation roc_auc 0.8500611186027527
disgust roc_auc 0.9348381757736206
fear roc_auc 0.9603618383407593
joy roc_auc 0.9651677012443542
love roc_auc 0.9456577897071838
optimism roc_auc 0.9377575516700745
pessimism roc_auc 0.8629046082496643
sadness roc_auc 0.9133340716362
surprise roc_auc 0.9054450988769531
trust roc_auc 0.8602060675621033

Train Step: 200, Loss: 0.3172791600227356
anger roc_auc 0.9515949487686157
anticipation roc_auc 0.8038421273231506
disgust roc_auc 0.9336362481117249
fear roc_a

  0%|          | 0/167 [00:00<?, ?it/s]


Epoch 2, Validation Loss: 0.2176194041967392, Time: 1044.0269854068756

anger roc_auc 0.969162106513977
anticipation roc_auc 0.8656231164932251
disgust roc_auc 0.9617990255355835
fear roc_auc 0.9843905568122864
joy roc_auc 0.9812222123146057
love roc_auc 0.9574999809265137
optimism roc_auc 0.9518638253211975
pessimism roc_auc 0.8889425992965698
sadness roc_auc 0.938092052936554
surprise roc_auc 0.9077406525611877
trust roc_auc 0.9025336503982544



Start of epoch 2


  0%|          | 0/1449 [00:00<?, ?it/s]


Train Step: 0, Loss: 0.2793531119823456
anger roc_auc 0.9564831256866455
anticipation roc_auc 0.8691880106925964
disgust roc_auc 0.9327391386032104
fear roc_auc 0.9378474950790405
joy roc_auc 0.9687138199806213
love roc_auc 0.9671627879142761
optimism roc_auc 0.9487375617027283
pessimism roc_auc 0.8922593593597412
sadness roc_auc 0.9014623761177063
surprise roc_auc 0.9123735427856445
trust roc_auc 0.8998816609382629

Train Step: 100, Loss: 0.2773434519767761
anger roc_auc 0.9703143835067749
anticipation roc_auc 0.8606038093566895
disgust roc_auc 0.9432589411735535
fear roc_auc 0.968048095703125
joy roc_auc 0.9740025997161865
love roc_auc 0.9602994322776794
optimism roc_auc 0.9391165375709534
pessimism roc_auc 0.8735160231590271
sadness roc_auc 0.9157474637031555
surprise roc_auc 0.9078453779220581
trust roc_auc 0.9014480113983154

Train Step: 200, Loss: 0.2753584086894989
anger roc_auc 0.9690783619880676
anticipation roc_auc 0.8579285740852356
disgust roc_auc 0.9495629668235779
fear r

  0%|          | 0/167 [00:00<?, ?it/s]


Epoch 3, Validation Loss: 0.21013018488883972, Time: 1045.3469138145447

anger roc_auc 0.9738008379936218
anticipation roc_auc 0.8725335001945496
disgust roc_auc 0.9611249566078186
fear roc_auc 0.984588086605072
joy roc_auc 0.9844741225242615
love roc_auc 0.9601401686668396
optimism roc_auc 0.9555678367614746
pessimism roc_auc 0.9064454436302185
sadness roc_auc 0.9441261887550354
surprise roc_auc 0.9385354518890381
trust roc_auc 0.9067897796630859




In [110]:
model.save_weights("/content/drive/MyDrive/SemEval/multi_emotion_classification_cls.h5")

## 5. Evaluate

In [37]:
df_result = test_set.copy(deep=True)
df_result = df_result.drop_duplicates('Tweet', keep='first')

df_result[label_cols] = 0.5
df_result.set_index('Tweet',inplace=True)

df_result.head()
test_set.head(10000)

Unnamed: 0,Tweet,anger,anticipation,disgust,fear,joy,love,optimism,pessimism,sadness,surprise,trust
0,@USER @USER Dont worry Indian army is on its w...,True,True,False,False,False,False,True,False,False,False,True
1,"Academy of Sciences , eschews the normally sob...",False,False,True,False,False,False,False,False,False,False,False
2,I blew that opportunity - __ - #mad,True,False,True,False,False,False,False,False,True,False,False
3,This time in 2 weeks I will be 30 ... :sad_but...,False,False,False,False,True,False,False,False,True,False,False
4,#Deppression is real . Partners w / #depressed...,False,False,False,True,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
3254,shaft abrasions from panties merely shifted to...,True,False,False,False,False,False,False,True,False,False,False
3255,@USER heard of Remothered ? Indie horror game ...,False,True,False,False,False,False,False,True,False,False,False
3256,All this fake outrage . Y'all need to stop :ro...,True,False,True,False,False,False,False,False,False,False,False
3257,Would be ever so grateful if you could record ...,False,False,False,False,True,False,False,False,False,False,False


In [42]:
model.load_weights('/content/drive/MyDrive/Models/relu2.h5')

In [43]:
from sklearn.metrics import multilabel_confusion_matrix, classification_report, hamming_loss, f1_score, accuracy_score, jaccard_score


test_auc_metrics = [tf.keras.metrics.AUC() for i in range(len(label_cols))]

for i, (token_ids, masks) in enumerate(tqdm(test_dataset)):
    labels = test_labels[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
    labels = tf.dtypes.cast(labels, tf.float32)
    
    sample_ids = test_set.iloc[i*BATCH_SIZE:(i+1)*BATCH_SIZE]['Tweet']
    predictions = model(token_ids, attention_mask=masks).numpy()

    df_result.loc[sample_ids, label_cols] = predictions

    for i, auc in enumerate(test_auc_metrics):
      auc.update_state(labels[:,i], predictions[:,i])

  0%|          | 0/204 [00:00<?, ?it/s]

In [44]:
for i, label_name in enumerate(label_cols):
    print(f"{label_name} roc_auc {test_auc_metrics[i].result()}")
    test_auc_metrics[i].reset_states()

anger roc_auc 0.9361817836761475
anticipation roc_auc 0.7635920643806458
disgust roc_auc 0.9167317152023315
fear roc_auc 0.9401597380638123
joy roc_auc 0.9533493518829346
love roc_auc 0.9215825796127319
optimism roc_auc 0.9080115556716919
pessimism roc_auc 0.8421571850776672
sadness roc_auc 0.9163305163383484
surprise roc_auc 0.804406464099884
trust roc_auc 0.7906150221824646


In [None]:
y_true = test_labels

best_thresh = 0
best_acc = 0
for thresh in np.arange(0.1, 1, 0.01):
  y_pred = df_result[label_cols].values
  y_pred = np.array([[1 if i > thresh else 0 for i in j] for j in y_pred])
  acc = jaccard_score(y_true, y_pred, average='samples')
  
  if acc > best_acc:
    best_thresh = thresh
    best_acc = acc
    
print(best_thresh)

In [47]:
from sklearn.metrics import accuracy_score, jaccard_score

y_true = test_labels
y_pred = df_result[label_cols].values

thresh = best_thresh
y_pred = np.array([[1 if i > thresh else 0 for i in j] for j in y_pred])

#print(f1_score(y_true, y_pred, average=None))

print('Micro F1',f1_score(y_true, y_pred,average='micro'))
print('Macro F1',f1_score(y_true, y_pred,average='macro'))
print('Accuracy', jaccard_score(y_true, y_pred, average='samples'))

Micro F1 0.7320033572212538
Macro F1 0.5824654375591761
Accuracy 0.6143043440143778


  _warn_prf(average, modifier, msg_start, len(result))


In [159]:
print(classification_report(y_true, y_pred))
#label_cols = ['anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust']

              precision    recall  f1-score   support

           0       0.78      0.86      0.82      1101
           1       0.39      0.31      0.34       425
           2       0.71      0.84      0.77      1099
           3       0.74      0.74      0.74       485
           4       0.83      0.90      0.86      1442
           5       0.61      0.72      0.66       516
           6       0.71      0.86      0.78      1143
           7       0.44      0.42      0.43       375
           8       0.73      0.79      0.76       960
           9       0.44      0.18      0.25       170
          10       0.35      0.08      0.13       153

   micro avg       0.71      0.76      0.73      7869
   macro avg       0.61      0.61      0.59      7869
weighted avg       0.70      0.76      0.72      7869
 samples avg       0.72      0.77      0.72      7869



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
