# 以Transformers套件實作問答(Question Answering)功能

In [1]:
# 載入相關套件
from transformers import pipeline

2024-11-04 23:47:15.052436: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-04 23:47:15.062628: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1730735235.076839   37266 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1730735235.082219   37266 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-04 23:47:15.094798: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
# 載入模型
nlp = pipeline("question-answering", "distilbert-base-cased-distilled-squad")

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [3]:
# 訓練資料
context = (
    r"Extractive Question Answering is the task of extracting an answer "
    + "from a text given a question. An example of a question answering "
    + "dataset is the SQuAD dataset, which is entirely based on that task. "
    + "If you would like to fine-tune a model on a SQuAD task, you may "
    + "leverage the examples/question-answering/run_squad.py script."
)

In [4]:
# 測試 2 筆
result = nlp(question="What is extractive question answering?", context=context)
print(
    f"Answer: '{result['answer']}', score: {round(result['score'], 4)}",
    f", start: {result['start']}, end: {result['end']}",
)

print()

result = nlp(
    question="What is a good example of a question answering dataset?", context=context
)
print(
    f"Answer: '{result['answer']}', score: {round(result['score'], 4)}",
    f", start: {result['start']}, end: {result['end']}",
)

Answer: 'the task of extracting an answer from a text given a question', score: 0.6226 , start: 33, end: 94

Answer: 'SQuAD dataset', score: 0.5053 , start: 146, end: 159


## 結合Tokenizer

In [5]:
from transformers import AutoTokenizer, TFAutoModelForQuestionAnswering
import tensorflow as tf

# 結合分詞器(Tokenizer)
tokenizer = AutoTokenizer.from_pretrained(
    "bert-large-uncased-whole-word-masking-finetuned-squad"
)
model = TFAutoModelForQuestionAnswering.from_pretrained(
    "bert-large-uncased-whole-word-masking-finetuned-squad"
)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

I0000 00:00:1730735353.196054   37266 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
All PyTorch model weights were used when initializing TFBertForQuestionAnswering.

All the weights of TFBertForQuestionAnswering were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForQuestionAnswering for predictions without further training.


In [6]:
# 訓練資料
text = r"""
🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch.
"""

In [7]:
# 問題
questions = [
    "How many pretrained models are available in 🤗 Transformers?",
    "What does 🤗 Transformers provide?",
    "🤗 Transformers provides interoperability between which frameworks?",
]

In [8]:
# 推測答案
for question in questions:
    inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="tf")
    input_ids = inputs["input_ids"].numpy()[0]
    outputs = model(inputs)

    answer_start_scores = outputs.start_logits
    answer_end_scores = outputs.end_logits
    answer_start = tf.argmax(answer_start_scores, axis=1).numpy()[0]
    answer_end = (tf.argmax(answer_end_scores, axis=1) + 1).numpy()[0]
    answer = tokenizer.convert_tokens_to_string(
        tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
    )

    print(f"Question: {question}")
    print(f"Answer: {answer}\n")

Question: How many pretrained models are available in 🤗 Transformers?
Answer: over 32 +

Question: What does 🤗 Transformers provide?
Answer: general - purpose architectures

Question: 🤗 Transformers provides interoperability between which frameworks?
Answer: tensorflow 2. 0 and pytorch

