In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

base_model = "distilbert-base-uncased-finetuned-sst-2-english"

tokenizer = AutoTokenizer.from_pretrained(base_model)

tokenizer = AutoTokenizer.from_pretrained(base_model, device_map="auto")
model = AutoModelForSequenceClassification.from_pretrained(base_model, load_in_8bit=True, torch_dtype=torch.float16)

model = model.eval()

In [3]:
from sklearn.metrics import accuracy_score, f1_score
from datasets import load_dataset
from tqdm import tqdm
import torch
import numpy as np

def change_format(x):
    dic = {0:2, 1:0}
    return dic[x]

def test_fiqa(model, tokenizer, batch_size=8):
    dataset = load_dataset('pauri32/fiqa-2018')
    dataset = dataset["test"]
    dataset = dataset.to_pandas()

    dataset["target"] = dataset['label']

    dataset = dataset[['sentence', 'target']]
    dataset.columns = ['input', 'target']

    # print example
    print(f"\n\nPrompt example:\n{dataset['input'][1]}\n\n")

    context = dataset['input'].tolist()
    total_steps = dataset.shape[0]//batch_size + 1
    print(
        f"Total len: {len(context)}. Batchsize: {batch_size}. Total steps: {total_steps}")

    out_text = []

    for i in tqdm(range(total_steps)):
        tmp_context = context[i * batch_size:(i+1) * batch_size]

        tokenizer.pad_token = "[PAD]"

        tokens = tokenizer(tmp_context, return_tensors='pt', padding=True)

        output = model(**tokens)
        output = torch.nn.functional.softmax(output.logits.float(), dim=-1)
        out_text.append(output.detach().numpy())
        torch.cuda.empty_cache()

    out_text = [item for sublist in out_text for item in sublist]
    dataset["out_text"] = out_text
    dataset["new_out_np"] = dataset["out_text"].apply(np.argmax)
    dataset["new_out"] = dataset["new_out_np"].apply(change_format)

    acc = accuracy_score(dataset["target"], dataset["new_out"])
    f1_macro = f1_score(dataset["target"], dataset["new_out"], average="macro")
    f1_micro = f1_score(dataset["target"], dataset["new_out"], average="micro")
    f1_weighted = f1_score(
        dataset["target"], dataset["new_out"], average="weighted")

    print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")

    return dataset


dataset = test_fiqa(model, tokenizer)
dataset



Prompt example:
@gakrum nice chart shows distinctive down channel not a dip.. where do you see the bottom? $SPY ..$150? ..$130?


Total len: 150. Batchsize: 8. Total steps: 19


100%|██████████| 19/19 [00:00<00:00, 22.60it/s]

Acc: 0.41333333333333333. F1 macro: 0.3131468088733046. F1 micro: 0.41333333333333333. F1 weighted (BloombergGPT): 0.3131468088733046. 





Unnamed: 0,input,target,out_text,new_out_np,new_out
0,$HCP Come to the party and buy this -gonna giv...,0,"[0.4479206, 0.5520794]",1,0
1,@gakrum nice chart shows distinctive down chan...,2,"[0.980034, 0.019966036]",0,2
2,Japan's Asahi to submit bid next week for SABM...,0,"[0.9754836, 0.024516335]",0,2
3,"Tesla Motors recalls 2,700 Model X SUVs $TSLA ...",2,"[0.99649614, 0.003503823]",0,2
4,CRH's concrete bid for Holcim Lafarge assets,0,"[0.98498523, 0.015014747]",0,2
...,...,...,...,...,...
145,Intertek swings to ÃÂ£347 mln loss on oil's s...,2,"[0.99939764, 0.0006023541]",0,2
146,RT @jan $ARNA Don't think buyout rumor strong ...,1,"[0.98866826, 0.011331754]",0,2
147,Barclays appoints JPMorgan's Paul Compton as n...,1,"[0.008140463, 0.9918595]",1,0
148,$AAPL Now I'm glad I got stopped out of my $11...,1,"[0.99687636, 0.0031236487]",0,2


In [4]:
dataset.to_csv('../results/distilbert-base-uncased-finetuned-sst-2-english-no-neutral.csv')

In [9]:
len(dataset.query("target == 1"))

50

# Remove neutral label

In [16]:
from sklearn.metrics import accuracy_score, f1_score
from datasets import load_dataset
from tqdm import tqdm
import torch
import numpy as np

def change_format(x):
    dic = {0:2, 1:0}
    return dic[x]

def test_fiqa(model, tokenizer, batch_size=8):
    dataset = load_dataset('pauri32/fiqa-2018')
    dataset = dataset["test"]
    dataset = dataset.to_pandas()

    dataset["target"] = dataset['label']
    dataset = dataset[dataset["target"] != 1]

    dataset = dataset[['sentence', 'target']]
    dataset.columns = ['input', 'target']

    # print example
    print(f"\n\nPrompt example:\n{dataset['input'][1]}\n\n")

    context = dataset['input'].tolist()
    total_steps = dataset.shape[0]//batch_size + 1
    print(
        f"Total len: {len(context)}. Batchsize: {batch_size}. Total steps: {total_steps}")

    out_text = []

    for i in tqdm(range(total_steps)):
        tmp_context = context[i * batch_size:(i+1) * batch_size]

        tokenizer.pad_token = "[PAD]"

        tokens = tokenizer(tmp_context, return_tensors='pt', padding=True)

        output = model(**tokens)
        output = torch.nn.functional.softmax(output.logits.float(), dim=-1)
        out_text.append(output.detach().numpy())
        torch.cuda.empty_cache()

    out_text = [item for sublist in out_text for item in sublist]
    dataset["out_text"] = out_text
    dataset["new_out_np"] = dataset["out_text"].apply(np.argmax)
    dataset["new_out"] = dataset["new_out_np"].apply(change_format)

    acc = accuracy_score(dataset["target"], dataset["new_out"])
    f1_macro = f1_score(dataset["target"], dataset["new_out"], average="macro")
    f1_micro = f1_score(dataset["target"], dataset["new_out"], average="micro")
    f1_weighted = f1_score(
        dataset["target"], dataset["new_out"], average="weighted")

    print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")

    return dataset


dataset = test_fiqa(model, tokenizer)
dataset



Prompt example:
@gakrum nice chart shows distinctive down channel not a dip.. where do you see the bottom? $SPY ..$150? ..$130?


Total len: 100. Batchsize: 8. Total steps: 13


100%|██████████| 13/13 [00:00<00:00, 36.07it/s]

Acc: 0.63. F1 macro: 0.5960257670051317. F1 micro: 0.63. F1 weighted (BloombergGPT): 0.5960257670051317. 





Unnamed: 0,input,target,out_text,new_out_np,new_out
0,$HCP Come to the party and buy this -gonna give solid gains and a dividend $$$$$$,0,"[0.4479206, 0.5520794]",1,0
1,@gakrum nice chart shows distinctive down channel not a dip.. where do you see the bottom? $SPY ..$150? ..$130?,2,"[0.980034, 0.019966036]",0,2
2,Japan's Asahi to submit bid next week for SABMiller's Grolsch and Peroni - Yomiuri,0,"[0.9754836, 0.024516335]",0,2
3,"Tesla Motors recalls 2,700 Model X SUVs $TSLA https://t.co/F55dx4aegI",2,"[0.99649614, 0.003503823]",0,2
4,CRH's concrete bid for Holcim Lafarge assets,0,"[0.98498523, 0.015014747]",0,2
...,...,...,...,...,...
141,FDA Approves AstraZeneca's Iressa As Lung Cancer Treatment,0,"[0.8407172, 0.15928277]",0,2
142,energy still failing to close above the 10D MA. continues to stand out weak $XLE,2,"[0.999757, 0.00024298552]",0,2
143,AstraZeneca's patent on asthma drug invalidated by US court,2,"[0.9983026, 0.0016974094]",0,2
145,Intertek swings to ÃÂ£347 mln loss on oil's slump,2,"[0.99936503, 0.00063495064]",0,2
