In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import wandb

from IPython.display import display

import torch
import torch.optim as optim
from torch.utils.data import Dataset
from transformers import AdamW, AutoConfig, AutoTokenizer
from electra_model import ElectraForSequenceClassification

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sweep_config = {
    'name' : 'sweep',
    'method': 'grid',
    'metric' : {
        'name': 'test_acc',
        'goal': 'maximize'   
        },
    'parameters' : {
        'learning_rate': {
            'values': [1e-4, 3e-4, 1e-5, 3e-5, 1e-6, 5e-6]
            },
        'batch_size': {
            'values': [256,128,64,32,16]
            }
        }
    }

In [3]:
class TextClassificationDataset(Dataset):
  def __init__(self,
               file_path = "./data/wellness_text_classification.txt",
               num_label = 359,
               device = 'cuda',
               max_seq_len = 128,
               tokenizer = AutoTokenizer.from_pretrained("./pretrained_model")
               ):
    self.file_path = file_path
    self.device = device
    self.data = []
    self.tokenizer = tokenizer
    file = open(self.file_path, 'r')

    while True:
      line = file.readline()
      if not line:
        break
      datas = line.split("    ")
      index_of_words = self.tokenizer.encode(datas[0])
      token_type_ids = [0] * len(index_of_words)
      attention_mask = [1] * len(index_of_words)

      padding_length = max_seq_len - len(index_of_words)

      index_of_words += [0] * padding_length
      token_type_ids += [0] * padding_length
      attention_mask += [0] * padding_length

      label = int(datas[1][:-1])

      data = {
              'input_ids': torch.tensor(index_of_words).to(self.device),
              'token_type_ids': torch.tensor(token_type_ids).to(self.device),
              'attention_mask': torch.tensor(attention_mask).to(self.device),
              'labels': torch.tensor(label).to(self.device)
             }

      self.data.append(data)

    file.close()

  def __len__(self):
    return len(self.data)
  def __getitem__(self,index):
    item = self.data[index]
    return item

In [4]:
class EarlyStopping:
    """주어진 patience 이후로 validation loss가 개선되지 않으면 학습을 조기 중지"""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt'):
        """
        Args:
            patience (int): validation loss가 개선된 후 기다리는 기간
                            Default: 7
            verbose (bool): True일 경우 각 validation loss의 개선 사항 메세지 출력
                            Default: False
            delta (float): 개선되었다고 인정되는 monitered quantity의 최소 변화
                            Default: 0
            path (str): checkpoint저장 경로
                            Default: 'checkpoint.pt'
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            #self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            #print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            #self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''validation loss가 감소하면 모델을 저장한다.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [5]:
def train(epochs ,model, optimizer, train_loader, test_loader, test_dataset, patience, bs, lr, wandb):
    loss = 0
    model.train()
    wandb.watch(model, loss, log="all", log_freq=10)
    losses = []
    early_stopping = EarlyStopping(patience = patience, verbose = True, delta = 0.01, path=f"./finetuned_model/{bs}with{lr}.pth")
    for epoch in range(1, epochs+1):
        model.train()
        for _, data in enumerate(train_loader, 1):
            optimizer.zero_grad()
            inputs = {'input_ids': data['input_ids'],
                      'attention_mask': data['attention_mask'],
                      'labels': data['labels']
                      }
            outputs = model(**inputs)

            loss = outputs[0]
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            
        model.eval()
        test_loss = 0
        test_losses = []
        acc = 0
        for data in test_loader:
            with torch.no_grad():
                inputs = {'input_ids': data['input_ids'],
                          'attention_mask': data['attention_mask'],
                          'labels': data['labels']
                          }
                outputs = model(**inputs)
                test_loss = outputs[0]
                test_losses.append(test_loss.item())
                
                logit = outputs[1]
                softmax_logit = torch.softmax(logit, dim=-1)
                softmax_logit = softmax_logit.squeeze()
                max_index = torch.argmax(softmax_logit).item()
                acc += (max_index == inputs['labels']).item()
        wandb.log({
            "train_loss": np.mean(losses), "test_loss": np.mean(test_losses),"test_acc": acc / len(test_dataset) * 100}, step=epoch)
        early_stopping(np.mean(test_losses), model)
        if early_stopping.early_stop:
            break

In [6]:
train_data = f"./data/wellness_text_classification_train.txt"
test_data = f"./data/wellness_text_classification_test.txt"
finetuned_model = f"./finetuned_model/psychological_counseling_model.pth"
pretrained_model = "./pretrained_model"

epochs = 200
patience = 30
device = torch.device("cuda")
tokenizer = AutoTokenizer.from_pretrained(pretrained_model)

train_dataset = TextClassificationDataset(file_path=train_data, tokenizer=tokenizer, device=device)
test_dataset = TextClassificationDataset(file_path=test_data, tokenizer=tokenizer, device=device)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)
electra_config = AutoConfig.from_pretrained(pretrained_model)
model = ElectraForSequenceClassification.from_pretrained(pretrained_model_name_or_path=pretrained_model,
                                                            config=electra_config,
                                                            num_labels=359)
model.to(device)

no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
      'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

Some weights of the model checkpoint at ./pretrained_model were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at ./pretrained_model and are newly initialized: ['classifier.dense.weight', 'classifier.out_

In [7]:
def run_sweep(config=None):
    wandb.init(config=config)
    w_config = wandb.config
    batch_size = w_config.batch_size
    learning_rate = w_config.learning_rate
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
    train(epochs ,model, optimizer, train_loader, test_loader, test_dataset, patience, batch_size, learning_rate, wandb)

In [8]:
sweep_id = wandb.sweep(sweep_config, project="sweep", entity='tkwk6428')
wandb.agent(sweep_id, run_sweep)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: iz8kk105
Sweep URL: https://wandb.ai/tkwk6428/sweep/sweeps/iz8kk105


[34m[1mwandb[0m: Agent Starting Run: xukz7uzz with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtkwk6428[0m. Use [1m`wandb login --relogin`[0m to force relogin




0,1
test_acc,▂▁▃▄▄▅▆▆▆▆▆▇▇█▇▇▇▇██▇█▇▇██▇▇▇▇▇██▇███
test_loss,▆█▄▃▃▂▁▁▂▃▂▂▃▂▃▃▄▄▅▅▆▅▅▆▆▆▆▇▇▇▇▇▇▇▄▅▅
train_loss,██▇▇▇▆▆▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
test_acc,47.07113
test_loss,4.70054
train_loss,1.17689


[34m[1mwandb[0m: Agent Starting Run: 74bgvtxp with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 3e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
test_acc,▂▁▂▃▃▃██▇▇▆█▆█▄▅▄▁▁▅▃▃▆▅▁▃▁▁▁▂▁▃▄▁▁
test_loss,▄▆▆▄▁▄▃▃▅▆▆▅▅▃▄▅▅▅▆▅▆▆▇▆▇█▇▇▆▅▆▆▆▅▆
train_loss,▁▁▁▃█▇▆▅▄▄▃▃▂▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
test_acc,43.72385
test_loss,5.2876
train_loss,0.13857


[34m[1mwandb[0m: Agent Starting Run: hs5cm792 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 5e-06
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
test_acc,▆▁▇▅▅▅▆▄▃▄▆▅▁▆▂▅▂▂▃▄▅▄▄▃▃▁▂█▆▄▅▇▅▆▃▅▄▂▃▄
test_loss,█▆▄▆▆▆▅▆▄▅▅▅▇▄▃▁▃▄▂▄▄▆▅▄▇▇▃▃▅▄▄▄▅▃▇▇▇▇▄█
train_loss,▁▃▃▄▇█▇▄▃▃▄▄▃▄▇▇▆▆▅▅▅▅▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃

0,1
test_acc,43.93305
test_loss,5.48517
train_loss,0.09076


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
