In [4]:
from sklearn.model_selection import train_test_split
import pandas as pd
from transformers import RobertaTokenizer
import torch
from tqdm import tqdm
torch.cuda.empty_cache()
import numpy as np
from sklearn.metrics import f1_score

from torch.utils.data import TensorDataset
from transformers import BertTokenizer

from transformers import BertForSequenceClassification
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import time


In [2]:
labels_dict = {'ANGRY':0, 
          'DISGUST':1, 
          'FEAR':2, 
          'HAPPY':3, 
          'SAD':4, 
          'SURPRISE':5}

# Enter your sequence here:
sequence = [
    "I'M SO GONNA KILL YOU YOU PRICK!!!",
    "Yucks.. gross...",
    "I'm hiding at home till this is over.",
    'I LOVE DONUTS',
    "A pity... a pity.",
    "WHAT?! When did this happen!!!"    
]
df = pd.read_csv('../data/labelled.csv', encoding='ISO-8859-1')
df['label'] = df['emotion'].apply(lambda x: labels_dict[x])

In [3]:
texts = list(df.text)[:1000]
labels = list(df.label)[:1000]

In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                          do_lower_case=True)

In [7]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels = 6,
                                                      output_attentions=False,
                                                      output_hidden_states=False)
PATH = './bert emotion/BERT_emotion_epoch_10.pt'
model.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')), strict=False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

<All keys matched successfully>

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print(device)

cuda


In [9]:
def evaluate(valid_dataloader):
    model.eval()

    total_eval_loss = 0
    y_hat, y = [], []

    for batch in tqdm(valid_dataloader):
        batch = tuple(b.to(device) for b in batch)

        inputs = {'input_ids' : batch[0],
                'attention_mask': batch[1],
                'labels' : batch[2]
                }
        
        with torch.no_grad():
            outputs = model(**inputs)

        loss = outputs[0]
        logits = outputs[1]
        total_eval_loss += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        y_hat.append(logits)
        y.append(label_ids)

    avg_eval_loss = total_eval_loss/len(valid_dataloader) 

    y_hat = np.concatenate(y_hat, axis=0)
    y = np.concatenate(y, axis=0)
            
    return avg_eval_loss, y_hat, y

In [8]:
#Per Class Accuracy
def acpc(preds, labels):
    code_dict = {val:key for key,val in labels_dict.items()}

    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for idx in np.unique(labels_flat):
        y_hat = preds_flat[labels_flat==idx]
        y = labels_flat[labels_flat==idx]
        print(f'Class: {code_dict[idx]}')
        print(f'Accuracy: {len(y_hat[y_hat==idx])/ len(y)}\n')

In [9]:
valid_encode = tokenizer.batch_encode_plus(
    texts,
    pad_to_max_length=True,
#     return_attention_masks=True,
    truncation=True,
    add_special_tokens=True,
    max_length=256,   
    return_tensors='pt'
)
valid_input = valid_encode['input_ids']
valid_attention = valid_encode['attention_mask']
valid_labels = torch.tensor(labels)

valid_data = TensorDataset(valid_input,
                          valid_attention,
                          valid_labels)

valid_dataloader = DataLoader(valid_data,
                              sampler = SequentialSampler(valid_data),
                              batch_size = 8)




In [10]:
start = time.time()
_, predictions, actual = evaluate(valid_dataloader)
print(acpc(predictions, actual))
labs = {v:k for k,v in labels_dict.items()}
for i in tqdm(range(len(predictions))):
    print(labs[np.argmax(predictions[i])], texts[i])
    print()
    
print(time.time() - start)

100%|████████████████████████████████████████████████████████████████████████████████| 125/125 [00:23<00:00,  5.38it/s]
 39%|██████████████████████████████                                               | 390/1000 [00:00<00:00, 3799.77it/s]

Class: ANGRY
Accuracy: 0.3325526932084309

Class: DISGUST
Accuracy: 0.45794392523364486

Class: FEAR
Accuracy: 0.6285714285714286

Class: HAPPY
Accuracy: 0.7317073170731707

Class: SAD
Accuracy: 0.42857142857142855

Class: SURPRISE
Accuracy: 0.3951048951048951

None
DISGUST Scum of the earth.

SURPRISE WOW. My  @Name Hidden  car parade must have 10,000 + in Vegas today. Miles of cars!!! Miles. Will dominate Vegas Strip for hours! Weâre ready to fight! Let the battle begin. Iâm leading in Maserati with American flag flying and my beautiful fiancÃ© Cindy driving. #MAGA

SURPRISE White House: Trump Signs Memo to Block Antifa Members From Entering US

FEAR Can't you just feel the sheer panic from Democrats? Tomorrow is going to be HUGE. Just watch ð

ANGRY Looking at all the pictures from the people who stormed the House chamber today - looks like ANTIFA in disguise to me.

HAPPY Could not be more clear

HAPPY Seems like this guy needs to be found and Wadded u

HAPPY Make her famous

100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 4061.33it/s]


DISGUST Lindsey Graham is a TRAITOR. Lindsey is going to jail.

FEAR Lin Wood got suspended permamnently from twatter so he used this other account for a bit but dorsey got that account now too

DISGUST The traitors stages the whole event. They all have blood on their hands.

SURPRISE BREAKING EXCLUSIVE: Evidence China Was Colluding with the Bidens and Providing Information on How to Defeat President Trump in the 2020 Election


SURPRISE Emergency Tweet By Trump: The Vice President Has The Power To Reject Fraudulently Chosen Electors

SURPRISE WATCH: Stacey Abrams Brags About Lack of 'Exact' Signature Matching in Georgia

ANGRY Mitt Romney, on a flight full of patriots on their way to DC chanting âTraitor!â

DISGUST Pence Betrayed General Flynn in 2017 and Today He Betrayed President Trump and Americathis asshole use to be a democrat. once a democrat.. forever a shit head democrat

FEAR HITMAN TURNED WHISTLEBLOWER LARRY NICHOLS CLAIMS CLINTONS GAVE HIM DIRECT COMMANDS TO KILL WITN


