In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

wczytanie modelu również z kwantyzacją

In [2]:
base_model_id = "mistralai/Mistral-7B-Instruct-v0.1"
base_model_id = "NousResearch/Llama-2-7b-chat-hf"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [3]:
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
    use_auth_token=True
)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)

wczytanie dotrenowanego modelu

In [5]:
from peft import PeftModel

model = PeftModel.from_pretrained(model, "llama-inz/checkpoint-200")

In [6]:
def formatting_func(example):
    text = f"""[INST]Given a context, score a comment from 0 to 9. Respond with just one number and nothing else.
    
    ### context: {example['title']} {example['post_text']}
    ### comment: {example['selftext']}[/INST] """
    return text

def generate_and_tokenize_prompt(prompt):
    return tokenizer(formatting_func(prompt))

zbiór testowy

po 100 przykłądów z każdą wartością metryki

In [13]:
from datasets import load_dataset

dataset = load_dataset('json', data_files='datasets/prepared_dataset_256max_small/reddit_posts_test.json')
dataset.shuffle()

DatasetDict({
    train: Dataset({
        features: ['title', 'post_text', 'grade', 'selftext'],
        num_rows: 1000
    })
})

wygenerowanie odpowiedzi modelu

In [10]:
import time

grades = []
prev = time.time()
start = prev
for i in range(1000):
    if i % 50 == 0:
        now = time.time()
        print(i, '/ 1000', f'{50/(now - prev)}/s', f'time: {now - start}', f'left: {(1000-i)/50*(now - prev)}')
        prev = time.time()
    eval_prompt = formatting_func(dataset['train'][i])
    model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
    model.eval()
    with torch.no_grad():
        out = tokenizer.decode(model.generate(**model_input, max_new_tokens=1, pad_token_id=tokenizer.eos_token_id)[0],
                               skip_special_tokens=True)
        pred_grade = out[-1]
        grades.append([pred_grade, dataset['train'][i]['grade']])

0 / 1000 437818.7891440501/s time: 0.00011420249938964844 left: 0.0022840499877929688
50 / 1000 5.3708595262205465/s time: 9.309647560119629 left: 176.8804407119751
100 / 1000 5.413454570094574/s time: 18.545953035354614 left: 166.25243425369263
150 / 1000 5.405153518945791/s time: 27.79644751548767 left: 157.25732803344727
200 / 1000 5.488141906102107/s time: 36.90706157684326 left: 145.76882553100586
250 / 1000 5.458278545202735/s time: 46.067522048950195 left: 137.40595936775208
300 / 1000 5.2373348408039275/s time: 55.614428758621216 left: 133.65576601028442
350 / 1000 5.477302952114996/s time: 64.74307179450989 left: 118.67154431343079
400 / 1000 5.373172446708501/s time: 74.04862308502197 left: 111.66587448120117
450 / 1000 5.4229314935835236/s time: 83.26879715919495 left: 101.42115950584412
500 / 1000 5.390103397667707/s time: 92.5451226234436 left: 92.76259899139404
550 / 1000 5.364516934555028/s time: 101.86569046974182 left: 83.88453340530396
600 / 1000 5.418011253972274/s t

sprawdzenie idelnych trafień modelu

In [18]:
total = 0
c = 0
errors = 0
for x in grades:
    try:
        if int(x[0]) == x[1]:
            total += 1
    except:
        errors += 1
        continue
    c += 1

print('procent skuteczności', total/c)
print('błędy:', errors)

procent skuteczności 0.295
błędy: 0


trafienie z pozwoleniem na błąd

różnica o 1 - 2/3 

różnica o 2 - 1/3

In [17]:
total = 0
t = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
c = 0
errors = 0
for x in grades:
    try:
        a = int(x[0]) - x[1]
        if a < 0:
            a = -a
        if a == 0:
            total += 1
        elif a < 3:
            total += 0.33 * (3-a)
        t[a] += 1
        c += 1
    except:
        errors += 1
        continue
    

print('procent skuteczności', total/c)
print('błędy:', errors)
print('tabelka różnic, od 0 po lewej do 9', t)

procent skuteczności 0.5190700000000041
błędy: 0
tabelka różnic, od 0 po lewej do 9 [295, 267, 145, 101, 16, 15, 28, 9, 66, 58]
