In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

base_model = "Seethal/sentiment_analysis_generic_dataset"

tokenizer = AutoTokenizer.from_pretrained(base_model)

tokenizer = AutoTokenizer.from_pretrained(base_model, device_map="auto")
model = AutoModelForSequenceClassification.from_pretrained(base_model, load_in_8bit=True, torch_dtype=torch.float16)

model = model.eval()

In [75]:
from sklearn.metrics import accuracy_score, f1_score
from datasets import load_dataset
from tqdm import tqdm
import torch
import numpy as np

def change_format(x):
    dic = {0:2, 1:1, 2:0}
    return dic[x]

def test_fiqa(model, tokenizer, batch_size=8):
    dataset = load_dataset('pauri32/fiqa-2018')
    dataset = dataset["test"]
    dataset = dataset.to_pandas()

    dataset["target"] = dataset['label']

    dataset = dataset[['sentence', 'target']]
    dataset.columns = ['input', 'target']

    # print example
    print(f"\n\nPrompt example:\n{dataset['input'][1]}\n\n")

    context = dataset['input'].tolist()
    total_steps = dataset.shape[0]//batch_size + 1
    print(
        f"Total len: {len(context)}. Batchsize: {batch_size}. Total steps: {total_steps}")

    out_text = []

    for i in tqdm(range(total_steps)):
        tmp_context = context[i * batch_size:(i+1) * batch_size]

        tokenizer.pad_token = "[PAD]"

        tokens = tokenizer(tmp_context, return_tensors='pt', padding=True)

        output = model(**tokens)
        output = torch.nn.functional.softmax(output.logits.float(), dim=-1)
        out_text.append(output.detach().numpy())
        torch.cuda.empty_cache()

    out_text = [item for sublist in out_text for item in sublist]
    dataset["out_text"] = out_text
    dataset["new_out_np"] = dataset["out_text"].apply(np.argmax)
    dataset["new_out"] = dataset["new_out_np"].apply(change_format)

    acc = accuracy_score(dataset["target"], dataset["new_out"])
    f1_macro = f1_score(dataset["target"], dataset["new_out"], average="macro")
    f1_micro = f1_score(dataset["target"], dataset["new_out"], average="micro")
    f1_weighted = f1_score(
        dataset["target"], dataset["new_out"], average="weighted")

    print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")

    return dataset


dataset = test_fiqa(model, tokenizer)
dataset



Prompt example:
@gakrum nice chart shows distinctive down channel not a dip.. where do you see the bottom? $SPY ..$150? ..$130?


Total len: 150. Batchsize: 8. Total steps: 19


100%|██████████| 19/19 [00:00<00:00, 33.59it/s]

Acc: 0.4266666666666667. F1 macro: 0.42695701936688657. F1 micro: 0.42666666666666675. F1 weighted (BloombergGPT): 0.42695701936688657. 





Unnamed: 0,input,target,out_text,new_out_np,new_out
0,$HCP Come to the party and buy this -gonna giv...,0,"[0.0030570752, 0.01366078, 0.98328215]",2,0
1,@gakrum nice chart shows distinctive down chan...,2,"[0.009738453, 0.058917087, 0.9313444]",2,0
2,Japan's Asahi to submit bid next week for SABM...,0,"[0.00080220663, 0.9968606, 0.0023371843]",1,1
3,"Tesla Motors recalls 2,700 Model X SUVs $TSLA ...",2,"[0.7519417, 0.23655118, 0.011507181]",0,2
4,CRH's concrete bid for Holcim Lafarge assets,0,"[0.0006290502, 0.99589646, 0.003474464]",1,1
...,...,...,...,...,...
145,Intertek swings to ÃÂ£347 mln loss on oil's s...,2,"[0.9799399, 0.01677867, 0.0032814075]",0,2
146,RT @jan $ARNA Don't think buyout rumor strong ...,1,"[0.017816206, 0.09909222, 0.88309157]",2,0
147,Barclays appoints JPMorgan's Paul Compton as n...,1,"[0.010575334, 0.12434536, 0.8650793]",2,0
148,$AAPL Now I'm glad I got stopped out of my $11...,1,"[0.0021437695, 0.008225937, 0.9896303]",2,0


In [3]:
dataset.to_csv('../results/falcon-7b-instruct.csv')

In [76]:
import pandas as pd

pd.set_option('display.max_colwidth', None)
dataset

Unnamed: 0,input,target,out_text,new_out_np,new_out
0,$HCP Come to the party and buy this -gonna give solid gains and a dividend $$$$$$,0,"[0.0030570752, 0.01366078, 0.98328215]",2,0
1,@gakrum nice chart shows distinctive down channel not a dip.. where do you see the bottom? $SPY ..$150? ..$130?,2,"[0.009738453, 0.058917087, 0.9313444]",2,0
2,Japan's Asahi to submit bid next week for SABMiller's Grolsch and Peroni - Yomiuri,0,"[0.00080220663, 0.9968606, 0.0023371843]",1,1
3,"Tesla Motors recalls 2,700 Model X SUVs $TSLA https://t.co/F55dx4aegI",2,"[0.7519417, 0.23655118, 0.011507181]",0,2
4,CRH's concrete bid for Holcim Lafarge assets,0,"[0.0006290502, 0.99589646, 0.003474464]",1,1
...,...,...,...,...,...
145,Intertek swings to ÃÂ£347 mln loss on oil's slump,2,"[0.9799399, 0.01677867, 0.0032814075]",0,2
146,RT @jan $ARNA Don't think buyout rumor strong enough too be driving this up just yet...when it is this may fly like aapl...// fly how high?,1,"[0.017816206, 0.09909222, 0.88309157]",2,0
147,Barclays appoints JPMorgan's Paul Compton as new COO,1,"[0.010575334, 0.12434536, 0.8650793]",2,0
148,$AAPL Now I'm glad I got stopped out of my $113 weeklies when I did. Will re-enter next Monday for the #earnings run-up.,1,"[0.0021437695, 0.008225937, 0.9896303]",2,0
