#### Exploring the validation steps with model loss

In [15]:
import torch
from torch.nn.functional import cross_entropy
import numpy as np
from datasets import load_dataset
import pandas as pd

from transformers import AutoModelForSequenceClassification, AutoTokenizer


In [13]:
!pwd

/home/vivek/Documents/Workshop/NLP/Transformers


In [14]:

model = AutoModelForSequenceClassification.from_pretrained('models/distilbert-base-uncased-finetuned-emotion/checkpoint-500/')
tokenizer = AutoTokenizer.from_pretrained('models/distilbert-base-uncased-finetuned-emotion/checkpoint-500/')

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [31]:
def forward_pass_with_label(batch):

    inputs = {k:v.to(device) for k,v in batch.items() if k in tokenizer.model_input_names}
    
    with torch.no_grad():
        output = model(**inputs)
        pred_label = torch.argmax(output.logits, axis=-1)
        loss = cross_entropy(output.logits, batch['label'].to(device),
                                reduction="none")

        return {'loss': loss.cpu().numpy(),
                'predicted_label': pred_label.cpu().numpy()}

                
        
        

In [25]:
emotions = load_dataset('SetFit/emotion')

Using custom data configuration SetFit--emotion-e444b7640ce3116e
Found cached dataset json (/home/vivek/.cache/huggingface/datasets/SetFit___json/SetFit--emotion-e444b7640ce3116e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


  0%|          | 0/3 [00:00<?, ?it/s]

In [26]:
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=False)
    
emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [27]:
emotions_encoded.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

In [32]:
emotions_encoded['validation'] = emotions_encoded['validation'].map(forward_pass_with_label, batched=True, batch_size=16)

  0%|          | 0/125 [00:00<?, ?ba/s]

In [36]:
label_map = dict((zip(emotions['train']['label'], emotions['train']['label_text'])))
label_map

{0: 'sadness', 3: 'anger', 2: 'love', 5: 'surprise', 4: 'fear', 1: 'joy'}

In [37]:
emotions_encoded.set_format('pandas')
cols = ['text', 'label_text', 'predicted_label', 'loss']
df_test = emotions_encoded['validation'][:][cols]
df_test['label'] = df_test['label_text']
df_test['predicted_label'] = df_test['predicted_label'].apply(lambda x: label_map[x])

In [39]:
with pd.option_context('display.max_colwidth', 400):
    display(df_test.sort_values("loss", ascending=False).head(10))

Unnamed: 0,text,label_text,predicted_label,loss,label
1950,i as representative of everything thats wrong with corporate america and feel that sending him to washington is a ludicrous idea,surprise,sadness,6.228292,surprise
1840,id let you kill it now but as a matter of fact im not feeling frightfully well today,joy,fear,5.907738,joy
1111,im lazy my characters fall into categories of smug and or blas people and their foils people who feel inconvenienced by smug and or blas people,joy,fear,5.867592,joy
1963,i called myself pro life and voted for perry without knowing this information i would feel betrayed but moreover i would feel that i had betrayed god by supporting a man who mandated a barely year old vaccine for little girls putting them in danger to financially support people close to him,joy,sadness,5.780801,joy
1509,i guess this is a memoir so it feels like that should be fine too except i dont know something about such a deep amount of self absorption made me feel uncomfortable,joy,fear,5.678258,joy
1870,i guess i feel betrayed because i admired him so much and for someone to do this to his wife and kids just goes beyond the pale,joy,sadness,5.546002,joy
882,i feel badly about reneging on my commitment to bring donuts to the faithful at holy family catholic church in columbus ohio,love,sadness,4.388071,love
1919,i should admit when consuming alcohol myself in small amounts i feel much less inhibited ideas come to me more easily and i can write with greater ease,fear,sadness,4.378604,fear
1500,i guess we would naturally feel a sense of loneliness even the people who said unkind things to you might be missed,anger,sadness,4.2596,anger
318,i felt ashamed of these feelings and was scared because i knew that something wrong with me and thought i might be gay,fear,sadness,4.179289,fear
