In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

base_model = "cardiffnlp/twitter-roberta-base-sentiment-latest"

tokenizer = AutoTokenizer.from_pretrained(base_model)

tokenizer = AutoTokenizer.from_pretrained(base_model, device_map="auto")
model = AutoModelForSequenceClassification.from_pretrained(base_model, load_in_8bit=True, torch_dtype=torch.float16)

model = model.eval()

Downloading config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
tokens = tokenizer(["Love it"], return_tensors='pt',
                           padding=True)
output = model(**tokens)

output = torch.nn.functional.softmax(output.logits.float(), dim=-1)
output.argmax()

tensor(2)

In [4]:
from sklearn.metrics import accuracy_score, f1_score
from datasets import load_dataset
from tqdm import tqdm
import torch
import numpy as np
import pandas as pd

def map_output(arg):
  dic = {0: 0, 1: 2, 2:4}
  return dic[arg]


def test_sentiment140(model, tokenizer, batch_size=8):
    dataset = load_dataset('sentiment140')
    dataset = dataset["test"]
    dataset = dataset.to_pandas()

    negative_df = dataset.query("sentiment == 0")[:75]
    positive_df = dataset.query("sentiment == 4")[:75]

    dataset = pd.concat([negative_df, positive_df])

    dataset = dataset.rename(columns={"sentiment": "target"})


    dataset = dataset[['text', 'target']]

    # print example
    print(f"\n\nPrompt example:\n{dataset['text'][1]}\n\n")

    context = dataset['text'].tolist()
    total_steps = dataset.shape[0]//batch_size + 1
    print(
        f"Total len: {len(context)}. Batchsize: {batch_size}. Total steps: {total_steps}")

    out_text = []

    for i in tqdm(range(total_steps)):
        tmp_context = context[i * batch_size:(i+1) * batch_size]

        tokenizer.pad_token = "[PAD]"

        tokens = tokenizer(tmp_context, return_tensors='pt',
                           padding=True)

        output = model(**tokens)
        output = torch.nn.functional.softmax(output.logits.float(), dim=-1)
        out_text.append(output.detach().numpy())
        torch.cuda.empty_cache()

    out_text = [item for sublist in out_text for item in sublist]
    dataset["out_text"] = out_text
    dataset["new_out"] = dataset["out_text"].apply(np.argmax).apply(map_output)

    acc = accuracy_score(dataset["target"], dataset["new_out"])
    f1_macro = f1_score(dataset["target"], dataset["new_out"], average="macro")
    f1_micro = f1_score(dataset["target"], dataset["new_out"], average="micro")
    f1_weighted = f1_score(
        dataset["target"], dataset["new_out"], average="weighted")

    print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")

    return dataset

dataset = test_sentiment140(model, tokenizer)
dataset



Prompt example:
Reading my kindle2...  Love it... Lee childs is good read.


Total len: 150. Batchsize: 8. Total steps: 19


100%|██████████| 19/19 [00:00<00:00, 20.82it/s]

Acc: 0.96. F1 macro: 0.6421854008060904. F1 micro: 0.96. F1 weighted (BloombergGPT): 0.9632781012091357. 





Unnamed: 0,text,target,out_text,new_out
6,Fuck this economy. I hate aig and their non lo...,0,"[0.9568466, 0.035315666, 0.00783778]",0
11,@Karoli I firmly believe that Obama/Pelosi hav...,0,"[0.79764926, 0.10938268, 0.09296796]",0
14,"dear nike, stop with the flywire. that shit is...",0,"[0.80429417, 0.11192144, 0.083784364]",0
16,I was talking to this guy last night and he wa...,0,"[0.82691664, 0.10229493, 0.070788465]",0
18,"@ludajuice Lebron is a Beast, but I'm still ch...",0,"[0.80426484, 0.10715792, 0.08857715]",0
...,...,...,...,...
196,@matthewcyan I finally got around to using jqu...,4,"[0.020957025, 0.12206564, 0.8569773]",4
202,@PDubyaD right!!! LOL we'll get there!! I have...,4,"[0.021675743, 0.12757644, 0.85074776]",4
203,"RT @blknprecious1: RT GREAT @dbroos ""Someone's...",4,"[0.028540956, 0.17323063, 0.79822844]",4
205,Warren Buffet became (for a time) the richest ...,4,"[0.019736337, 0.11778954, 0.86247414]",4


In [5]:
path = '../../results/general/roberta-base-sentiment-latest.csv'

In [6]:
dataset.to_csv(path)

In [7]:
import pandas as pd

df = pd.read_csv(path, index_col=0)

true, pred = df["target"], df["new_out"]


import sys
sys.path.append('../../')
from metrics import metrics

metrics(true, pred)

Precision: 0.968, Recall: 0.96, F1: 0.963, Accuracy: 0.96


  _warn_prf(average, modifier, msg_start, len(result))
