In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
from transformers import BitsAndBytesConfig
import torch

model_id = "huggyllama/llama-7b"
peft_model_id = './training/qlora-tasks-7b-v1.9.1/checkpoint-6000/adapter_model'

config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(
    'huggyllama/llama-7b',
    load_in_4bit=True,
    device_map='auto',
    torch_dtype=torch.bfloat16,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4'
    ),
)
model = PeftModel.from_pretrained(model, peft_model_id)

tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
from datasets import concatenate_datasets, load_from_disk, DatasetDict
combined_tasks = load_from_disk('datasets/tasks/combined_task_dataset_io_notags/')

In [None]:
combined_tasks['test']['output'][0]

In [None]:
combined_tasks = combined_tasks.filter(lambda data: data['input'].startswith('[korrupció klasszifikáció]'))

In [None]:
pos_tasks = combined_tasks.filter(lambda data: data['output'] == ' korrupció\n')
neg_tasks = combined_tasks.filter(lambda data: data['output'] != ' korrupció\n')

In [None]:
neg_tasks

In [None]:
pos_tasks['train'] = pos_tasks['train'].select(range(250))
pos_tasks['validation'] = pos_tasks['validation'].select(range(150))
pos_tasks['test'] = pos_tasks['test'].select(range(75))

In [None]:
from datasets import concatenate_datasets, load_dataset, DatasetDict

real_dataset = DatasetDict()

real_dataset['train'] = concatenate_datasets([pos_tasks['train'], neg_tasks['train']])
real_dataset['validation'] = concatenate_datasets([pos_tasks['validation'], neg_tasks['validation']])
real_dataset['test'] = concatenate_datasets([pos_tasks['test'], neg_tasks['test']])

real_dataset

In [None]:
from tqdm import tqdm
device = "cuda:0"

n = 0
k = 0
i = 0

tp = 0
fp = 0
tn = 0
fn = 0

positive = 'korrupció'
negative = 'egyéb'

for task_in, task_out in tqdm(zip(real_dataset['test']['input'], real_dataset['test']['output']), total=len(real_dataset['test'])):
    if not task_in.startswith('[korrupció klasszifikáció]'):
        continue
    with torch.cuda.amp.autocast():
        text = task_in
        inputs = tokenizer(text, return_tensors="pt").to(device)
        if len(inputs['input_ids'][0]) > 2048:
            continue
        outputs = model.generate(**inputs, max_new_tokens=4)
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        #print(result[result.rfind(':')+2:].strip())
        #print(task_out.strip())
        n += 1
        sresult = result[result.rfind(':')+2:].strip()
        slabel = task_out.strip()
        if sresult not in ['korrupció', 'egyéb']:
            i += 1
        if sresult == positive and slabel == positive:
            tp += 1
        elif sresult == negative and slabel == negative:
            tn += 1
        elif sresult == positive and slabel == negative:
            fp += 1
        elif sresult == negative and slabel == positive:
            fn += 1
        if task_out.strip() == (result[result.rfind(':')+2:].strip()):
            k += 1
    if n % 10 == 0 and n > 0:
        print(k/n, k, n, i)
        print('precision', tp/(tp+fp))
        print('recall', tp/(tp+fn))
        print('accuracy', (tp+tn)/(tp+tn+fp+fn))
        print(tp, tn, fp, fn)


print(k/n, k, n, i)

print(tp, tn, fp, fn)

In [None]:
print('precision', tp/(tp+fp))
print('recall', tp/(tp+fn))
print('accuracy', (tp+tn)/(tp+tn+fp+fn))