## 3.5. 모델 추론하기

- 파이프라인을 활용한 추론
- 직접 추론

In [6]:
from transformers import pipeline
from datasets import load_dataset
from huggingface_hub import notebook_login


model_name = "asanobm/roberta-base-klue-ynat-classification"

model_pipeline = pipeline('text-classification', model=model_name, tokenizer="klue/bert-base")

dataset = load_dataset("klue", "ynat")

model_pipeline(dataset['train']['title'][:5])


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'label': 'LABEL_3', 'score': 0.40618231892585754},
 {'label': 'LABEL_3', 'score': 0.9865808486938477},
 {'label': 'LABEL_2', 'score': 0.5443487167358398},
 {'label': 'LABEL_0', 'score': 0.5110354423522949},
 {'label': 'LABEL_3', 'score': 0.9928354620933533}]

In [None]:
# 직접 추론하기

import torch
from torch.nn.functional import softmax
from transformers import AutoModelForSequenceClassification, AutoTokenizer

class YNATPipeline:
    def __init__(self, model_name):
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model.eval()

    def __call__(self, text):
        tokenized = self.tokenizer( text,
                                    return_tensors="pt",
                                    padding=True,
                                    truncation=True)
        
        with torch.no_grad():
            outputs = self.model(**tokenized)
            logits = outputs.logits

        probabilities = softmax(logits, dim=-1)
        scores, labels = torch.max(probabilities, dim=-1)
        labels_str = [self.model.config.id2label[label_index] for label_index in labels.tolist()]
        return [{"label": label, "score": score} for label, score in zip(labels_str, scores.tolist())]

ynat_pipeline = YNATPipeline(model_name)
ynat_pipeline(dataset['train'][0]['title'])