In [1]:
# !pip install -q -U transformers
# !pip install -q -U accelerate
# !pip install -q -U bitsandbytes
# !pip install pandas
# !pip install datasets
# # # also installed ipykernel

In [2]:
import torch
from transformers import pipeline, GenerationConfig

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pandas as pd
from datasets import Dataset
json_path='C:/Users/marko/OneDrive/바탕 화면/semeval/Task 6 - windows/data/val.model-aware.v2.json'
df = pd.read_json(json_path)
df["label"] = df["label"].map({"Hallucination":1, "Not Hallucination":0})
df = df.dropna(how="any", axis=1)
# remove other columns here
df = df[['hyp','label','model']]
df.head()


Unnamed: 0,hyp,label,model
0,A sloping top .,1,ltg/flan-t5-definition-en-base
1,To react too much .,0,ltg/flan-t5-definition-en-base
2,The process of spoiling ; the state of being s...,1,ltg/flan-t5-definition-en-base
3,To arrange in a particular way .,1,ltg/flan-t5-definition-en-base
4,A feeling of concern ; a feeling of anxiety .,1,ltg/flan-t5-definition-en-base


In [4]:
def add_prompt_and_features(example):
    prompt = "Classify the following text as either hallucinating or not hallucinating: "
    example['input_text'] = prompt + example['hyp'] + " Model: " + str(example['model'])
    return example

dataset = Dataset.from_pandas(df)
dataset = dataset.map(add_prompt_and_features)


Map:   0%|          | 0/501 [00:00<?, ? examples/s]

Map: 100%|██████████| 501/501 [00:00<00:00, 16359.64 examples/s]


In [5]:
dataset[0]

{'hyp': 'A sloping top .',
 'label': 1,
 'model': 'ltg/flan-t5-definition-en-base',
 'input_text': 'Classify the following text as either hallucinating or not hallucinating: A sloping top . Model: ltg/flan-t5-definition-en-base'}

In [6]:
from transformers import AutoTokenizer
# i tried using beta instead of alpha but the kernel kept dying
base_model = 'stabilityai/stablelm-zephyr-3b'

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

def tokenize_function(examples):
    return tokenizer(examples['input_text'], padding='max_length', truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Map: 100%|██████████| 501/501 [00:00<00:00, 1260.68 examples/s]


In [7]:
# pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")

# # We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
# messages = [
#     {
#         "role": "system",
#         "content": "You are a friendly chatbot who always responds in the style of a pirate",
#     },
#     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
# ]
# prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
# print(outputs[0]["generated_text"])

In [8]:
# !nvidia-smi
# torch.cuda.is_available()
torch.cuda.current_device()

0

In [9]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)


# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Get quantized model.
bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)


In [10]:
tokenized_datasets

Dataset({
    features: ['hyp', 'labels', 'model', 'input_text', 'input_ids', 'attention_mask'],
    num_rows: 501
})

In [11]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(tokenized_datasets, batch_size=1, shuffle=True)

In [16]:
from transformers import AdamW, get_scheduler
from tqdm.auto import tqdm

optimizer = AdamW(model.parameters(), lr=5e-5)

num_epochs = 5
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

model.train()
for epoch in range(num_epochs):
    progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")
    for batch in progress_bar:
        print(batch)
        if 'labels' not in batch:
            raise ValueError("Batch does not contain 'labels'")
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.set_postfix({"loss": loss.item()})


Epoch 1/5:   0%|          | 0/501 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/501 [00:00<?, ?it/s]


{'labels': tensor([0]), 'input_ids': tensor([[4947, 1419,  253,  ...,    0,    0,    0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0]])}


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
from sklearn.metrics import accuracy_score

model.eval()
eval_dataloader = DataLoader(dataset, batch_size=1)

predictions, true_labels = [], []
with torch.no_grad():
    for batch in eval_dataloader:
        outputs = model(**batch)
        logits = outputs.logits
        predictions.extend(torch.argmax(logits, dim=-1).tolist())
        true_labels.extend(batch['labels'].tolist())

accuracy = accuracy_score(true_labels, predictions)
print(f'Accuracy: {accuracy}')
