In [1]:
import torch
from datasets import load_dataset
from torch.utils.data import DataLoader

tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'openai-gpt')

Using cache found in C:\Users\ssungz/.cache\torch\hub\huggingface_pytorch-transformers_main


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/816k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/458k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.27M [00:00<?, ?B/s]



In [2]:
torch.__version__
torch.cuda.is_available()

True

In [3]:
ds = load_dataset("fancyzhx/ag_news")
tokenizer.pad_token = tokenizer.unk_token

def collate_fn(batch):
  max_len = 400
  texts, labels = [], []
  for row in batch:
    labels.append(row['label'])
    texts.append(row['text'])

  texts = torch.LongTensor(tokenizer(texts, padding='max_length', truncation=True, max_length=max_len).input_ids)
  labels = torch.LongTensor(labels)

  return texts, labels


train_loader = DataLoader(
    ds['train'], batch_size=64, shuffle=True, collate_fn=collate_fn
)
test_loader = DataLoader(
    ds['test'], batch_size=64, shuffle=False, collate_fn=collate_fn
)

unique_labels = set()
for split in ['train']:
    for item in ds[split]:
        unique_labels.add(item['label'])

num_classes = len(unique_labels)
print(num_classes)


4


In [4]:
print(ds['train'])

Dataset({
    features: ['text', 'label'],
    num_rows: 120000
})


In [5]:
model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'openai-gpt')
model

Using cache found in C:\Users\ssungz/.cache\torch\hub\huggingface_pytorch-transformers_main


OpenAIGPTModel(
  (tokens_embed): Embedding(40478, 768)
  (positions_embed): Embedding(512, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0-11): 12 x Block(
      (attn): Attention(
        (c_attn): Conv1D(nf=2304, nx=768)
        (c_proj): Conv1D(nf=768, nx=768)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): MLP(
        (c_fc): Conv1D(nf=3072, nx=768)
        (c_proj): Conv1D(nf=768, nx=3072)
        (act): NewGELUActivation()
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
  )
)

In [6]:
from torch import nn


class TextClassifier(nn.Module):
  def __init__(self):
    super().__init__()

    self.encoder = torch.hub.load('huggingface/pytorch-transformers', 'model', 'openai-gpt')
    self.classifier = nn.Linear(768, num_classes)

  def forward(self, x):
    x = self.encoder(x)['last_hidden_state']
    x = self.classifier(x[:, -1])

    return x


model = TextClassifier()
model

Using cache found in C:\Users\ssungz/.cache\torch\hub\huggingface_pytorch-transformers_main


TextClassifier(
  (encoder): OpenAIGPTModel(
    (tokens_embed): Embedding(40478, 768)
    (positions_embed): Embedding(512, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x Block(
        (attn): Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      )
    )
  )
  (classifier): Linear(in_features=768, out_features=4, bias=True)
)

In [None]:
## 질문사항 (평가 시 이 부분에 대해 코멘트가 가능하다면 부탁드립니다.) ##

In [None]:
## 아래 코드로 학습 시 accuracy 값이 현저 히 낮은데 
##
from torch import nn
import torch.nn.functional as F

class TextClassifier(nn.Module):
  def __init__(self, num_classes, hidden_size=256, dropout_rate=0.5):
    super().__init__()

    self.encoder = torch.hub.load('huggingface/pytorch-transformers', 'model', 'openai-gpt')
    self.dropout = nn.Dropout(dropout_rate)
    self.bn = nn.BatchNorm1d(hidden_size)
    self.intermediate = nn.Linear(768, hidden_size)
    self.classifier = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    x = self.encoder(x)['last_hidden_state']
    x = x[:, -1]
    x = self.dropout(x)
    x = self.intermediate(x)
    x = self.bn(x)
    x = F.relu(x)
    x = self.classifier(x)

    return x


model = TextClassifier(num_classes)
model

In [7]:
for param in model.encoder.parameters():
  param.requires_grad = False

In [8]:
from torch.optim import Adam
import numpy as np
import matplotlib.pyplot as plt


lr = 0.0001
model = model.to('cuda')
loss_fn = nn.CrossEntropyLoss()

optimizer = Adam(model.parameters(), lr=lr)
n_epochs = 10

for epoch in range(n_epochs):
  total_loss = 0.
  model.train()
  for data in train_loader:
    model.zero_grad()
    inputs, labels = data
    inputs, labels = inputs.to('cuda'), labels.to('cuda')

    preds = model(inputs)
    loss = loss_fn(preds, labels)
    loss.backward()
    optimizer.step()

    total_loss += loss.item()

  print(f"Epoch {epoch:3d} | Train Loss: {total_loss}")

Epoch   0 | Train Loss: 2356.035525918007
Epoch   1 | Train Loss: 2039.858066380024
Epoch   2 | Train Loss: 1880.4663382172585
Epoch   3 | Train Loss: 1784.542182803154
Epoch   4 | Train Loss: 1723.501572072506
Epoch   5 | Train Loss: 1674.478139281273
Epoch   6 | Train Loss: 1648.0704543590546
Epoch   7 | Train Loss: 1621.1899408102036
Epoch   8 | Train Loss: 1602.7535386681557
Epoch   9 | Train Loss: 1584.0639610290527


In [9]:
def accuracy(model, dataloader):
  cnt = 0
  acc = 0

  for data in dataloader:
    inputs, labels = data
    inputs, labels = inputs.to('cuda'), labels.to('cuda')

    preds = model(inputs)
    preds = torch.argmax(preds, dim=-1)
    # preds = (preds > 0).long()[..., 0]

    cnt += labels.shape[0]
    acc += (labels == preds).sum().item()

  return acc / cnt


with torch.no_grad():
  model.eval()
  train_acc = accuracy(model, train_loader)
  test_acc = accuracy(model, test_loader)
  print(f"=========> Train acc: {train_acc:.3f} | Test acc: {test_acc:.3f}")

