<a href="https://colab.research.google.com/github/ingabLee/Transformers_book/blob/main/TransformerLearning_Chap43_49.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# load model, tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=3)

In [None]:
# prepare dictionary for label
dic = { 0:'positive', 1:'neutral', 2:'negative' }

# data
eval_list = ["I like apple",
             "I like pear",
             "I go to school",
             "I dislike mosquito",
             "I felt very sad",
             "I feel so good"]

# answer label
ans = torch.tensor([0, 0, 1, 2, 2, 0])

# no fine-tunning inference
# eval mode
model.eval()

# no graident
# eval mode is no train.
with torch.no_grad():
  for article in eval_list:

    # eval_list text -> encoding tokenizing
    inputs = tokenizer.encode(article, return_tensors="pt", padding=True, truncation=True)

    # inputs model
    outputs = model(inputs)

    # get logits
    logits = outputs.logits

    # argmax output logits
    # convert using item()
    # print label with input text
    print(f"{dic[logits.argmax(-1).item()]}:{article}")


In [None]:
# runtime 1min
# activation func AdamW
from torch.optim import AdamW

# instance AdamW
optimizer = AdamW(model.parameters(), lr=1e-5)

# tran mode.
model.train()

# epoch 50, loss list
epochs=50
losses = []

# fine-tunning
for epoch in range(epochs):
  # init gradient
  optimizer.zero_grad()

  # eval_list text 6 -> encoding
  inputs = tokenizer.batch_encode_plus(eval_list, return_tensors="pt",
                                        padding=True, truncation=True)

  #inputs에 담긴 key와 value을 **inputs형식(**kargs)으로 전달.
  # 추가로 labels를 tensor type으로 전달
  outputs = model(**inputs, labels=ans)

  # get logit
  logits = outputs.logits

  # get loss
  loss = outputs.loss

  #error backward
  loss.backward()

  # update weight
  optimizer.step()

  # add loss
  losses.append(loss)

  # print epoch, loss
  print(f"epoch:{epoch}, loss:{loss}")

In [None]:
# pytorch에서 item()은 tensor로부터 값을 python 숫자로 추출하여 cpu전달
new_losses = [i.item() for i in losses]

import matplotlib.pyplot as plt
plt.plot(new_losses);

In [None]:
# set eval mode
model.eval()

# model inference result list
preds = []

with torch.no_grad():
  for article in eval_list:
    inputs = tokenizer.encode(article, return_tensors="pt",
                              padding=True, truncation=True)
    outputs = model(inputs)

    logits = outputs.logits
    pred = logits.argmax(-1).item()
    preds.append(pred)
    print(f"{dict[pred]} : {article}")


In [None]:
preds = torch.tensor(preds)
preds

In [None]:
# calc accuracy
print(f"Accuracy : { 100 * sum(ans.detach().clone()==preds) / len(ans.detach().clone())}%")