In [None]:
!pip install -U evaluate nltk rouge_score absl-py

In [1]:
import evaluate
import torch
import transformers
from typing import Dict
from collections import defaultdict
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import get_peft_model, LoraConfig, TaskType, PeftModel,PeftConfig
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from datasets import load_dataset, load_from_disk

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id= "../base/qwen/Qwen2-0_5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16, load_in_8bit=True, trust_remote_code=True)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [3]:
model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (q_proj): Linear8bitLt(in_features=896, out_features=896, bias=True)
          (k_proj): Linear8bitLt(in_features=896, out_features=128, bias=True)
          (v_proj): Linear8bitLt(in_features=896, out_features=128, bias=True)
          (o_proj): Linear8bitLt(in_features=896, out_features=896, bias=False)
          (rotary_emb): Qwen2RotaryEmbedding()
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear8bitLt(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear8bitLt(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear8bitLt(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm()
        (post_attention_layernorm): Qwen2RMSNorm()
      )
    )
    (norm): Qwen2R

In [None]:
data = load_from_disk("../outputs/final/Qwen2-0_5B-instruct-lora/eval_data")

In [None]:
refs = data["output"]
messages = data.map(lambda example: {"message": [{"role": "system", "content": "为下面的新闻生成摘要"},{"role": "user", "content": example["input"]}]})["message"]

In [None]:
preds = []
for message in messages:
    text = tokenizer.apply_chat_template(
        message,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    preds.append(response)

In [None]:
import sys
sys.path.append("..")
from metrics.bleu.bleu import Bleu
from metrics.rouge.rouge import Rouge

# Calculate BLEU and ROUGE
rouge = evaluate.load("../metrics/rouge")
bleu = Bleu()
print("load done")
result_rouge = rouge.compute(predictions=preds, references=refs)
result_bleu = bleu.compute(predictions=preds, references=refs)

print("ROUGE:", result_rouge)
print("BLEU:", result_bleu)

In [None]:
preds[:5]


In [None]:
refs[:5]