In [35]:
from transformers import AutoModelForQuestionAnswering, AutoConfig, AutoTokenizer
from datasets import load_from_disk

import torch

In [30]:
MODEL_NAME = "klue/bert-base"

config = AutoConfig.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForQuestionAnswering.from_pretrained(MODEL_NAME)

Downloading:   0%|          | 0.00/425 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/495k [00:00<?, ?B/s]

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForQuestionAnswering: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model chec

In [4]:
datasets = load_from_disk("/opt/ml/data/train_dataset")

In [5]:
train_dataset = datasets["train"]
print(train_dataset)

Dataset({
    features: ['title', 'context', 'question', 'id', 'answers', 'document_id', '__index_level_0__'],
    num_rows: 3952
})


In [6]:
train_dataset["answers"][0]

{'answer_start': [235], 'text': ['하원']}

In [39]:
tokenized_sentence = tokenizer(
    train_dataset["question"][0],
    train_dataset["context"][0],
    return_tensors="pt"
)

In [40]:
tokenized_sentence.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

In [51]:
start_positions = torch.tensor([235])
end_positions = torch.tensor([238])
labels = torch.tensor([[235, 238]])

out = model(
    **tokenized_sentence, 
    start_positions=start_positions, 
    end_positions=end_positions,
    return_dict=True
    )

TypeError: forward() got an unexpected keyword argument 'labels'

In [48]:
out

QuestionAnsweringModelOutput(loss=tensor(6.0589, grad_fn=<DivBackward0>), start_logits=tensor([[-0.3059, -0.9222, -0.2331, -0.8421, -0.9025, -1.1790, -0.7388, -0.3408,
         -0.1035, -0.5049, -0.2560,  0.0613, -0.5251, -0.2891, -0.7815,  0.2120,
         -0.6595, -0.2199, -1.0516, -0.5320, -0.0349, -0.0700, -0.9708, -0.2157,
         -0.2859,  0.7928,  0.6218, -0.2001, -0.3493, -0.3769, -0.3863, -0.0763,
          0.2019,  0.0544, -0.1868,  0.2715,  0.1691, -0.8376, -0.1143, -0.3292,
         -0.7497,  0.3955, -0.0778,  0.0379, -0.6412, -0.0810, -0.5769, -0.2044,
         -0.0623, -0.6679, -0.1289, -0.4068,  0.0559, -0.5087, -0.4809, -0.5339,
         -0.1168,  0.0655, -0.1178,  0.0824, -0.2020, -0.2079,  0.2262,  0.1166,
         -0.5492, -0.1458, -0.2022, -0.0715,  0.2656, -0.1173, -0.5548,  0.0815,
          0.0676, -0.6508, -0.0288, -0.0987, -1.2160,  0.1511, -0.2197,  1.0742,
          0.0835,  0.1725,  0.1395, -0.1228, -0.1613, -0.7191, -0.8271, -0.2049,
          0.3245,  0.1

In [13]:
from transformers import pipeline

In [14]:
nlp = pipeline("question-answering")

Downloading:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/261M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [15]:
context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the `run_squad.py`.
"""

In [17]:
out = nlp(question="What is extractive question answering?", context=context)
print(out)

{'score': 0.6222434043884277, 'start': 34, 'end': 95, 'answer': 'the task of extracting an answer from a text given a question'}


In [18]:
print(type(out))

<class 'dict'>


In [19]:
print(nlp)

<transformers.pipelines.question_answering.QuestionAnsweringPipeline object at 0x7fb82e1b7b20>
