In [1]:
!pip install evaluate
!pip install datasets
!pip install openprompt
!pip install openprompt transformers



In [2]:
import torch
import numpy as np
import random
from tqdm import tqdm
import evaluate
from datasets import load_dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [3]:
from openprompt.plms import load_plm
from openprompt import PromptDataLoader, PromptForGeneration
from openprompt.plms import T5TokenizerWrapper
from openprompt.prompts import ManualTemplate, ManualVerbalizer



In [4]:
# 固定随机
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [5]:
# cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# model+tokenizer
plm, tokenizer, model_config, WrapperClass = load_plm("t5", "t5-base")
plm.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [7]:
# squad validation
dataset = load_dataset("squad", split="validation")
metric = evaluate.load("squad")

In [8]:
# 定义提示模板
template_text = 'question: {"placeholder":"text_a"} context: {"placeholder":"text_b"} Answer: {"mask"}.'
mytemplate = ManualTemplate(tokenizer=tokenizer, text=template_text)

In [10]:
prompt_model = PromptForGeneration(
    plm=plm,
    template=mytemplate,
    tokenizer=tokenizer,
    plm_eval_mode=True,
).to(device)

In [11]:
from openprompt.data_utils import InputExample

In [12]:
# 定义生成答案的函数
def generate_answer(question, context):
    #data = [{"text_a": question, "text_b": context}]
    example = InputExample(
        guid=0,  # 唯一标识符
        text_a=question,  # 问题
        text_b=context,  # 上下文
    )
    data = [example]
    dataloader = PromptDataLoader(
        dataset=data,
        tokenizer=tokenizer,
        template=mytemplate,
        tokenizer_wrapper_class=WrapperClass,
        max_seq_length=512,
        decoder_max_length=32,
        batch_size=1,
        shuffle=False,
        device=device,
    )
    with torch.no_grad():
        for batch in dataloader:
            outputs = prompt_model.generate(batch)
            decoded = tokenizer.decode(outputs, skip_special_tokens=True).strip()
            return decoded

In [13]:
predictions = []
references = []

for sample in tqdm(dataset):
    context = sample["context"]
    question = sample["question"]
    true_answers = sample["answers"]["text"]
    answer_start = sample["answers"]["answer_start"]

    pred_text = generate_answer(question, context)

    predictions.append({
        "id": sample["id"],
        "prediction_text": pred_text
    })
    references.append({
        "id": sample["id"],
        "answers": {
            "text": true_answers,
            "answer_start": answer_start
        }
    })


  0%|          | 0/10570 [00:00<?, ?it/s]
tokenizing: 1it [00:00, 302.14it/s]
  0%|          | 0/10570 [00:00<?, ?it/s]


AttributeError: 'PromptForGeneration' object has no attribute 'can_generate'

In [None]:
# 计算评估指标
results = metric.compute(predictions=predictions, references=references)
print("Exact Match:", round(results["exact_match"], 2))
print("F1 Score:", round(results["f1"], 2))