<a href="https://colab.research.google.com/github/jason9693/APEACH/blob/master/apeach.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install transformers
# !pip install datasets

In [None]:
model_name = "monologg/koelectra-small-v3-discriminator"
dataset_name = "jason9693/APEACH"

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# set device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

Some weights of the model checkpoint at monologg/koelectra-small-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-small-v3-discriminator and are newly initialized

ElectraForSequenceClassification(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(35000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (embeddings_project): Linear(in_features=128, out_features=256, bias=True)
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=256, out_features=256, bias=True)
              (key): Linear(in_features=256, out_features=256, bias=True)
              (value): Linear(in_features=256, out_features=256, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_

In [None]:
from datasets import load_dataset
dataset = load_dataset(dataset_name)

Downloading:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

Using custom data configuration default-1448d8c4b3f7cc27


Downloading and preparing dataset csv/APEACH (download: 1.15 MiB, generated: 1.21 MiB, post-processed: Unknown size, total: 2.36 MiB) to /root/.cache/huggingface/datasets/csv/default-1448d8c4b3f7cc27/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/787k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/344k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-1448d8c4b3f7cc27/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
batch_size =  32#@param {type:"integer"}
learning_rate =  5e-5#@param {type:"number"}
num_epochs = 3 #@param {type:"slider", min:0, max:100, step:1}
num_training_steps = 5000 #@param {type:"slider", min:0, max:50000, step:1}

In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(dataset["train"], shuffle=True, batch_size=batch_size)
eval_dataloader = DataLoader(dataset["test"], batch_size=batch_size)

In [None]:
from torch.optim import AdamW
from transformers import get_scheduler

optimizer = AdamW(model.parameters(), lr=learning_rate)

num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

## Train

In [None]:
from tqdm.auto import tqdm

progress_bar = tqdm(range(num_training_steps))

model.train()
for epoch in range(num_epochs):
    for batch in train_dataloader:
        # make inputs
        inputs = tokenizer(batch['text'], padding=True, return_tensors='pt')
        inputs['labels'] = torch.tensor(batch['class'])
        # to GPU
        inputs = {k: v.to(device) for k,v in inputs.items()}
        # forward
        outputs = model(**inputs)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)

  0%|          | 0/741 [00:00<?, ?it/s]

  if __name__ == '__main__':


In [None]:
import numpy as np
from datasets import load_metric

metrics = [
    load_metric("accuracy"), 
    load_metric("f1"), 
    load_metric("precision"),
    load_metric("recall")]

model.eval()
for batch in eval_dataloader:
    # make inputs
    inputs = tokenizer(batch['text'], padding=True, return_tensors='pt')
    inputs['labels'] = torch.tensor(batch['class'])
    # to GPU
    inputs = {k: v.to(device) for k,v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)

    for metric in metrics:
      metric.add_batch(predictions=predictions, references=inputs["labels"])

for metric in metrics:
  print(metric.compute())

Downloading builder script:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.21k [00:00<?, ?B/s]

  


{'accuracy': 0.7323607427055703}
{'f1': 0.7829640782964077}
{'precision': 0.6674000733406674}
{'recall': 0.9469302809573361}
