In [None]:
!pip install transformers
!pip install datasets

In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

import transformers
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
)

In [13]:
class ClassificationDataset(torch.utils.data.Dataset):
    def __init__(self, data_path, tokneizer):
        self.dataset = pd.read_csv(data_path)
        self.tokenizer = tokneizer

    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        title = self.dataset.iloc[idx]['title']
        label = self.dataset.iloc[idx]['label']
        title_encoding = self.tokenizer(title, padding="max_length", max_length=32, truncation=True)

        result = {key: torch.LongTensor(val) for key, val in title_encoding.items()}
        result['labels'] = torch.tensor(np.argmax(eval(label)))

        return result

In [26]:
# device 설정
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# load tokenizer, model
tokenizer = AutoTokenizer.from_pretrained('klue/bert-base')
model = AutoModelForSequenceClassification.from_pretrained('klue/bert-base', num_labels=10)
model.to(device)

# load dataset
train_dataset = ClassificationDataset('/content/train.csv', tokenizer)
valid_dataset = ClassificationDataset('/content/valid.csv', tokenizer)

# dataloader
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=4, shuffle=True)

# Optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

cuda:0


Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized

In [27]:
# 학습
for epoch in range(10):
    print(f'Epoch {epoch+1}')
    model.train()
    train_loss = 0
    for batch in tqdm(train_dataloader):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        train_loss += loss.item()

        loss.backward()
        optimizer.step()

    train_loss /= len(train_dataloader)
    print(f'Train Loss: {train_loss:.4f}')

    # 검증
    model.eval()
    eval_loss = 0
    num_correct = 0
    num_total = 0
    with torch.no_grad():
        for batch in tqdm(valid_dataloader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            eval_loss += loss.item()

            preds = torch.argmax(outputs.logits, dim=1)
            num_correct += (preds == labels).sum().item()
            num_total += len(labels)

    accuracy = num_correct / num_total
    eval_loss /= len(valid_dataloader)
    print(f'Validation Loss: {eval_loss:.4f}, Accuracy: {accuracy:.4f}')
torch.save(model.state_dict(), "./pytorch_model.pt")

Epoch 1


  0%|          | 0/3779 [00:00<?, ?it/s]

Train Loss: 1.1258


  0%|          | 0/1528 [00:00<?, ?it/s]

Validation Loss: 0.8760, Accuracy: 0.6911
Epoch 2


  0%|          | 0/3779 [00:00<?, ?it/s]

Train Loss: 1.0264


  0%|          | 0/1528 [00:00<?, ?it/s]

Validation Loss: 0.8390, Accuracy: 0.6993
Epoch 3


  0%|          | 0/3779 [00:00<?, ?it/s]

Train Loss: 0.9576


  0%|          | 0/1528 [00:00<?, ?it/s]

Validation Loss: 0.8523, Accuracy: 0.7052
Epoch 4


  0%|          | 0/3779 [00:00<?, ?it/s]

KeyboardInterrupt: ignored