In [None]:
import torch
from tqdm.auto import tqdm
from torch.utils.data import DataLoader
from transformers import (
    Trainer,
    TrainingArguments,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    AdamW
)

def tokenize_function(examples):
    return tokenizer(examples["title"], padding="max_length", truncation=True)
    # {
    #     'title': '한국 영화가 아카데미상을 수상했다',
    #     'label': 1,
    #     'input_ids': [2, 1037, 4123, 102, ...],  # 실제 토큰 ID들
    #     'attention_mask': [1, 1, 1, 1, 0, 0, 0]  # 패딩 마스크
    # }

if hasattr(torch, 'npu') and torch.npu.is_available():
    device = torch.device("npu")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model_id = "klue/roberta-base"
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=len(train_dataset.features['label'].names))
tokenizer = AutoTokenizer.from_pretrained(model_id)
model.to(device)    # Trainer 에서는 내부적으로 해줌

In [None]:
def make_dataloader(dataset, batch_size, shuffle=True):
    dataset = dataset.map(tokenize_function, batched=True).with_format("torch")

    # Trainer 에서 아래 두 라인은 내부적으로 해줌
    dataset = dataset.rename_column("label", "labels")
    dataset = dataset.remove_columns(column_names=['title'])
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

batch_size=8
train_dataloader = make_dataloaer(test_datder(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataloader = make_dataloader(valid_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = make_dataloadaset, batch_size=8, shuffle=False)

In [None]:
import numpy as np

def train_epoch(model, data_loader, optimizer):
    model.train()
    total_loss = 0

    for batch in tqdm(data_loader):
        optimizer.zero_grad()   # 이전 배치의 gradient 초기화
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        loss.backward()     # 역전파 (Backpropagation)
        # 이 시점에서 각 파라미터는 loss에 대한 그래디언트 값을 가짐
        # 예: 특정 가중치의 그래디언트가 0.1이면, 이 가중치가 0.1만큼 증가할 때 loss가 1만큼 증가한다는 의미

        optimizer.step()    # 모델 파라미터 업데이트
        # 예: learning rate가 0.01이고 그래디언트가 0.1이면
        # 파라미터 = 파라미터 - (0.01 * 0.1)
        # 즉, loss를 감소시키는 방향으로 파라미터를 조금씩 수정

        total_loss += loss.item()

    avg_loss = total_loss / len(data_loader)
    return avg_loss

def evaluate(model, data_loader):
    model.eval()
    total_loss = 0
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in tqdm(data_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            logits = outputs.logits     # 모델의 예측값
            loss = outputs.loss
            total_loss += loss.item()

            preds = torch.argmax(logits, dim=-1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    avg_loss = total_loss / len(data_loader)
    accuracy = np.mean(np.array(predictions) == np.array(true_labels))

    return avg_loss, accuracy

In [None]:
num_epochs = 1
optimizer = AdamW(model.parameters(), lr=5e-5)

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss = train_epoch(model, train_dataloader, optimizer)
    print(f"Training loss: {train_loss}")
    valid_loss, valid_accuracy = evaluate(model, valid_dataloader)
    print(f"Validation loss: {valid_loss}")
    print(f"Validation accuracy: {valid_accuracy}")

_, test_accuracy = evaluate(model, test_dataloader)
print(f"Test accuracy: {test_accuracy}")