# Outline

In [None]:
import torch
import transformers

classifier = transformers.pipeline("sentiment-analysis")
classifier(
    [
        "I've been waiting for a HuggingFace course my whole life.",
        "I hate this so much!",
    ]
)

# Behind the Pipeline API

4 Stage Pipeline
1. dataset
2. language tokenizer
3. pretrained_model
4. model output
![](https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline.svg)

## 1. initialize tokenizer & model

In [None]:
raw_inputs = [
    "I love you so much",   # 5 Words
    "screw you",            # 2 Words
]

In [None]:
import transformers

tokenizer             = transformers. AutoTokenizer.                     from_pretrained("bert-base-cased")

model_general         = transformers. AutoModel.                         from_pretrained("bert-base-cased")
model_classification  = transformers. AutoModelForSequenceClassification.from_pretrained("bert-base-cased")

## 2. tokenizer & input to model

In [None]:
tokenized_output = tokenizer(raw_inputs , padding=True , return_tensors="pt")    # Numeric ids => as PYTORCH TENSORS

print(tokenized_output['input_ids'])

In [None]:
print(f'tokenizer returns multiple things things => {tokenized_output.keys()}')

index = 0
while index < len(tokenized_output['input_ids']):
    print(f"Row Number => {index+1}, \n\t input_ids \t=> { tokenized_output['input_ids'][index]}, \n\t attention_mask => {tokenized_output['attention_mask'][index]}")
    index = index + 1

## 3. model output

In [None]:
classification_output = model_classification( **tokenized_output )
print(f'Model OUTPUT: {classification_output} ' )

In [None]:
predictions = torch.nn.functional.softmax(classification_output.logits, dim=-1)
print(predictions)

# Complete Pipeline

In [None]:
import torch

from transformers import AutoModelForSequenceClassification

checkpoint = "bert-base-cased"
tokenizer  = transformers.AutoTokenizer.from_pretrained(checkpoint)
model      = AutoModelForSequenceClassification.from_pretrained(checkpoint)

raw_inputs = [
    "I love you so much",   # 5 Words
    "screw you",            # 2 Words
]
numeric_ids = tokenizer(raw_inputs , padding=True , return_tensors="pt")    # Numeric ids => as PYTORCH TENSORS

outputs = model(**numeric_ids)

print(outputs.logits)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)

print(model.config.id2label)