In [1]:
import optuna
import pandas as pd
import os
import re
import json
import yaml
from glob import glob
from tqdm import tqdm
from pprint import pprint
import torch
import pytorch_lightning as pl
from rouge import Rouge  # 모델의 성능을 평가하기 위한 라이브러리입니다.

from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, BartForConditionalGeneration, BartConfig
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import Trainer, TrainingArguments
from transformers import EarlyStoppingCallback

import wandb

In [2]:
tokenizer = AutoTokenizer.from_pretrained("EbanLee/kobart-summary-v3")

In [3]:
config_data = {
    "general": {
        "data_path": "../data/", 
        "model_name": "EbanLee/kobart-summary-v3",
        "output_dir": "./"  
    },
    "tokenizer": {
        "encoder_max_len": 512,
        "decoder_max_len": 100,
        "bos_token": f"{tokenizer.bos_token}",
        "eos_token": f"{tokenizer.eos_token}",
        "special_tokens": ['#Person1#', '#Person2#', '#Person3#', '#PhoneNumber#', '#Address#', '#PassportNumber#']
    },
    "training": {
        "overwrite_output_dir": True,
        "num_train_epochs": 20,
        "learning_rate": 1e-5,
        "per_device_train_batch_size": 50,
        "per_device_eval_batch_size": 32,
        "warmup_ratio": 0.1,
        "weight_decay": 0.01,
        "lr_scheduler_type": 'cosine',
        "optim": 'adamw_torch',
        "gradient_accumulation_steps": 1,
        "evaluation_strategy": 'epoch',
        "save_strategy": 'epoch',
        "save_total_limit": 5,
        "fp16": False,
        "load_best_model_at_end": True,
        "seed": 42,
        "logging_dir": "./logs",
        "logging_strategy": "epoch",
        "predict_with_generate": True,
        "generation_max_length": 100,
        "do_train": True,
        "do_eval": True,
        "early_stopping_patience": 3,
        "early_stopping_threshold": 0.001,
        "report_to": "wandb"
    },
    "wandb": {
        "entity": "legendki",
        "project": "NLP-Summarization",
        "name": "KoBART-summary-v3-optuna",
    },
    "inference": {
        "ckt_path": "model ckt path",
        "result_path": "./prediction/",
        "no_repeat_ngram_size": 2,
        "early_stopping": True,
        "generate_max_length": 100,
        "num_beams": 4,
        "batch_size": 32,
        "remove_tokens": ['<usr>', f"{tokenizer.bos_token}", f"{tokenizer.eos_token}", f"{tokenizer.pad_token}"]
    }
}

In [4]:
def compute_metrics(config, tokenizer, pred):
    rouge = Rouge()
    predictions = pred.predictions
    labels = pred.label_ids

    predictions[predictions == -100] = tokenizer.pad_token_id
    labels[labels == -100] = tokenizer.pad_token_id

    decoded_preds = tokenizer.batch_decode(predictions, clean_up_tokenization_spaces=True)
    labels = tokenizer.batch_decode(labels, clean_up_tokenization_spaces=True)

    replaced_predictions = decoded_preds.copy()
    replaced_labels = labels.copy()
    remove_tokens = config['inference']['remove_tokens']
    for token in remove_tokens:
        replaced_predictions = [sentence.replace(token, " ") for sentence in replaced_predictions]
        replaced_labels = [sentence.replace(token, " ") for sentence in replaced_labels]

    results = rouge.get_scores(replaced_predictions, replaced_labels, avg=True)

    result = {
        'rouge1': results['rouge-1']['f'],
        'rouge2': results['rouge-2']['f'],
        'rougeL': results['rouge-l']['f'],
    }
    return result


In [5]:
def load_trainer_for_train(config, generate_model, tokenizer, train_inputs_dataset, val_inputs_dataset):
    print('-'*10, 'Make training arguments', '-'*10,)
    training_args = Seq2SeqTrainingArguments(
        output_dir=config['general']['output_dir'],  
        overwrite_output_dir=config['training']['overwrite_output_dir'],
        num_train_epochs=config['training']['num_train_epochs'],
        learning_rate=config['training']['learning_rate'],
        per_device_train_batch_size=config['training']['per_device_train_batch_size'], 
        per_device_eval_batch_size=config['training']['per_device_eval_batch_size'], 
        warmup_ratio=config['training']['warmup_ratio'], 
        weight_decay=config['training']['weight_decay'], 
        lr_scheduler_type=config['training']['lr_scheduler_type'],
        optim=config['training']['optim'],
        gradient_accumulation_steps=config['training']['gradient_accumulation_steps'],
        evaluation_strategy=config['training']['evaluation_strategy'], 
        save_strategy=config['training']['save_strategy'],
        save_total_limit=config['training']['save_total_limit'], 
        fp16=config['training']['fp16'],
        load_best_model_at_end=config['training']['load_best_model_at_end'], 
        seed=config['training']['seed'],
        logging_dir=config['training']['logging_dir'], 
        logging_strategy=config['training']['logging_strategy'],
        predict_with_generate=config['training']['predict_with_generate'],
        generation_max_length=config['training']['generation_max_length'],
        do_train=config['training']['do_train'],
        do_eval=config['training']['do_eval'],
        report_to=config['training']['report_to']
    )

    wandb.init(
        entity=config['wandb']['entity'],
        project=config['wandb']['project'],
        name=config['wandb']['name'],
    )

    os.environ["WANDB_LOG_MODEL"] = "true"
    os.environ["WANDB_WATCH"] = "false"

    MyCallback = EarlyStoppingCallback(
        early_stopping_patience=config['training']['early_stopping_patience'],
        early_stopping_threshold=config['training']['early_stopping_threshold']
    )
    print('-'*10, 'Make training arguments complete', '-'*10,)
    print('-'*10, 'Make trainer', '-'*10,)

    trainer = Seq2SeqTrainer(
        model=generate_model,  
        args=training_args,
        train_dataset=train_inputs_dataset,
        eval_dataset=val_inputs_dataset,
        compute_metrics=lambda pred: compute_metrics(config, tokenizer, pred),
        callbacks=[MyCallback]
    )
    print('-'*10, 'Make trainer complete', '-'*10,)

    return trainer

In [6]:
def load_tokenizer_and_model_for_train(config, device):
    print('-'*10, 'Load tokenizer & model', '-'*10,)
    print('-'*10, f'Model Name : {config["general"]["model_name"]}', '-'*10,)
    model_name = config['general']['model_name']
    bart_config = BartConfig().from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    generate_model = BartForConditionalGeneration.from_pretrained(config['general']['model_name'], config=bart_config)

    special_tokens_dict = {'additional_special_tokens': config['tokenizer']['special_tokens']}
    tokenizer.add_special_tokens(special_tokens_dict)

    generate_model.resize_token_embeddings(len(tokenizer))  
    generate_model.to(device)
    print(generate_model.config)

    print('-'*10, 'Load tokenizer & model complete', '-'*10,)
    return generate_model, tokenizer

In [7]:
def load_data(config):
    data_path = config['general']['data_path']
    train_df = pd.read_csv(os.path.join(data_path, 'train.csv'))
    val_df = pd.read_csv(os.path.join(data_path, 'dev.csv'))
    return train_df, val_df

In [8]:
class Preprocess:
    def __init__(self, bos_token: str, eos_token: str):
        self.bos_token = bos_token
        self.eos_token = eos_token

    @staticmethod
    def make_set_as_df(file_path, is_train=True):
        df = pd.read_csv(file_path)
        if is_train:
            return df[['fname', 'dialogue', 'summary']]
        else:
            return df[['fname', 'dialogue']]

    def make_input(self, dataset, is_test=False):
        if is_test:
            encoder_input = dataset['dialogue']
            decoder_input = [self.bos_token] * len(dataset['dialogue'])
            return encoder_input.tolist(), list(decoder_input)
        else:
            encoder_input = dataset['dialogue']
            decoder_input = dataset['summary'].apply(lambda x: self.bos_token + str(x))
            decoder_output = dataset['summary'].apply(lambda x: str(x) + self.eos_token)
            return encoder_input.tolist(), decoder_input.tolist(), decoder_output.tolist()


In [9]:
def prepare_train_dataset(config, preprocessor, data_path, tokenizer):
    train_file_path = os.path.join(data_path, 'train.csv')
    val_file_path = os.path.join(data_path, 'dev.csv')

    train_data = preprocessor.make_set_as_df(train_file_path)
    val_data = preprocessor.make_set_as_df(val_file_path)

    print('-'*150)
    print(f'train_data:\n {train_data["dialogue"][0]}')
    print(f'train_label:\n {train_data["summary"][0]}')

    print('-'*150)
    print(f'val_data:\n {val_data["dialogue"][0]}')
    print(f'val_label:\n {val_data["summary"][0]}')

    encoder_input_train, decoder_input_train, decoder_output_train = preprocessor.make_input(train_data)
    encoder_input_val, decoder_input_val, decoder_output_val = preprocessor.make_input(val_data)
    print('-'*10, 'Load data complete', '-'*10, )

    tokenized_encoder_inputs = tokenizer(encoder_input_train, return_tensors="pt", padding=True,
                                         add_special_tokens=True, truncation=True, max_length=config['tokenizer'][
            'encoder_max_len'], return_token_type_ids=False)
    tokenized_decoder_inputs = tokenizer(decoder_input_train, return_tensors="pt", padding=True,
                                         add_special_tokens=True, truncation=True, max_length=config['tokenizer'][
            'decoder_max_len'], return_token_type_ids=False)
    tokenized_decoder_outputs = tokenizer(decoder_output_train, return_tensors="pt", padding=True,
                                          add_special_tokens=True, truncation=True, max_length=config['tokenizer'][
            'decoder_max_len'], return_token_type_ids=False)

    train_inputs_dataset = DatasetForTrain(tokenized_encoder_inputs, tokenized_decoder_inputs, tokenized_decoder_outputs,
                                           len(encoder_input_train))

    val_tokenized_encoder_inputs = tokenizer(encoder_input_val, return_tensors="pt", padding=True,
                                             add_special_tokens=True, truncation=True, max_length=config['tokenizer'][
            'encoder_max_len'], return_token_type_ids=False)
    val_tokenized_decoder_inputs = tokenizer(decoder_input_val, return_tensors="pt", padding=True,
                                             add_special_tokens=True, truncation=True, max_length=config['tokenizer'][
            'decoder_max_len'], return_token_type_ids=False)
    val_tokenized_decoder_outputs = tokenizer(decoder_output_val, return_tensors="pt", padding=True,
                                              add_special_tokens=True, truncation=True, max_length=config['tokenizer'][
            'decoder_max_len'], return_token_type_ids=False)

    val_inputs_dataset = DatasetForVal(val_tokenized_encoder_inputs, val_tokenized_decoder_inputs,
                                       val_tokenized_decoder_outputs, len(encoder_input_val))

    print('-'*10, 'Make dataset complete', '-'*10, )
    return train_inputs_dataset, val_inputs_dataset


In [10]:
class DatasetForTrain(Dataset):
    def __init__(self, encoder_input, decoder_input, labels, len):
        self.encoder_input = encoder_input
        self.decoder_input = decoder_input
        self.labels = labels
        self.len = len

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encoder_input.items()}
        item2 = {key: val[idx].clone().detach() for key, val in self.decoder_input.items()}  
        item2['decoder_input_ids'] = item2['input_ids']
        item2['decoder_attention_mask'] = item2['attention_mask']
        item2.pop('input_ids')
        item2.pop('attention_mask')
        item.update(item2) 
        item['labels'] = self.labels['input_ids'][idx] 
        return item

    def __len__(self):
        return self.len

In [11]:
class DatasetForVal(Dataset):
    def __init__(self, encoder_input, decoder_input, labels, len):
        self.encoder_input = encoder_input
        self.decoder_input = decoder_input
        self.labels = labels
        self.len = len

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encoder_input.items()}  
        item2 = {key: val[idx].clone().detach() for key, val in self.decoder_input.items()} 
        item2['decoder_input_ids'] = item2['input_ids']
        item2['decoder_attention_mask'] = item2['attention_mask']
        item2.pop('input_ids')
        item2.pop('attention_mask')
        item.update(item2) 
        item['labels'] = self.labels['input_ids'][idx] 
        return item

    def __len__(self):
        return self.len

In [12]:
def objective(trial):
    config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)
    config_data['training']['per_device_train_batch_size'] = trial.suggest_categorical('per_device_train_batch_size', [4, 8, 16, 32])
    config_data['training']['num_train_epochs'] = trial.suggest_int('num_train_epochs', 10, 30)
    config_data['training']['warmup_ratio'] = trial.suggest_float('warmup_ratio', 0.0, 0.3)
    config_data['training']['optim'] = trial.suggest_categorical('optim', ['adamw_torch', 'adamw_hf', 'adafactor'])
    config_data['training']['gradient_accumulation_steps'] = trial.suggest_int('gradient_accumulation_steps', 1, 4)
    config_data['training']['lr_scheduler_type'] = trial.suggest_categorical('lr_scheduler_type', ['linear', 'cosine'])
    config_data['training']['fp16'] = trial.suggest_categorical('fp16', [False])

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(f"Running on: {device}")

    generate_model, tokenizer = load_tokenizer_and_model_for_train(config_data, device)
    print("Model and Tokenizer Loaded.")

    preprocessor = Preprocess(config_data['tokenizer']['bos_token'], config_data['tokenizer']['eos_token'])
    train_inputs_dataset, val_inputs_dataset = prepare_train_dataset(config_data, preprocessor, config_data['general']['data_path'], tokenizer)

    trainer = load_trainer_for_train(config_data, generate_model, tokenizer, train_inputs_dataset, val_inputs_dataset)

    trainer.train()

    eval_metrics = trainer.evaluate(eval_dataset=val_inputs_dataset)
    rougeL = eval_metrics.get('rougeL', 0.0)

    return rougeL

In [13]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

print(f"Best trial: {study.best_trial.value}")
print(f"Best parameters: {study.best_trial.params}")

[I 2024-08-30 08:23:20,988] A new study created in memory with name: no-name-1fedfa8c-3f14-4c5b-9198-f8ddf62bed20
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------
BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": fals

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


---------- Make dataset complete ----------
---------- Make training arguments ----------


[34m[1mwandb[0m: Currently logged in as: [33mrmadyd0314[0m ([33mlegendki[0m). Use [1m`wandb login --relogin`[0m to force relogin


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
0,0.8415,0.594905,0.354056,0.118829,0.338108
1,0.57,0.552857,0.365507,0.131802,0.350015
3,0.4514,0.528809,0.376591,0.141621,0.362533
4,0.4016,0.529387,0.379413,0.146472,0.362652
6,0.3199,0.556059,0.375274,0.14184,0.35974


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 08:50:47,163] Trial 0 finished with value: 0.0 and parameters: {'learning_rate': 2.1169083140275866e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 26, 'warmup_ratio': 0.09577831393575928, 'optim': 'adamw_hf', 'gradient_accumulation_steps': 3, 'lr_scheduler_type': 'cosine', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,█▄▁▁▁▂▄▁
eval/rouge1,▁▄▆▇██▇▇
eval/rouge2,▁▄▆▇█▇▇▇
eval/rougeL,▁▄▇███▇█
eval/runtime,▃▂▅▅▁█▄▂
eval/samples_per_second,▅▆▄▄█▁▅▆
eval/steps_per_second,▅▆▄▄█▁▅▆
train/epoch,▁▁▂▂▃▃▅▅▆▆▇▇████
train/global_step,▁▁▂▂▃▃▅▅▆▆▇▇████
train/learning_rate,▁▆███▇▇

0,1
eval/loss,0.52881
eval/rouge1,0.37659
eval/rouge2,0.14162
eval/rougeL,0.36253
eval/runtime,9.1568
eval/samples_per_second,54.495
eval/steps_per_second,1.747
train/epoch,7.0
train/global_step,3635.0
train/learning_rate,2e-05


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
1,0.925,0.605762,0.342505,0.112973,0.329635
2,0.5743,0.555523,0.362305,0.12755,0.347382
3,0.5142,0.538218,0.36958,0.130196,0.353386
4,0.472,0.531331,0.373452,0.136274,0.358458
5,0.4397,0.530218,0.374848,0.140457,0.360029
6,0.415,0.530673,0.37035,0.136226,0.353578
7,0.3944,0.533168,0.377342,0.140464,0.358657
8,0.3787,0.533128,0.382388,0.144933,0.365906


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 09:19:06,201] Trial 1 finished with value: 0.0 and parameters: {'learning_rate': 3.0036192710102503e-05, 'per_device_train_batch_size': 32, 'num_train_epochs': 10, 'warmup_ratio': 0.17006261573259593, 'optim': 'adamw_torch', 'gradient_accumulation_steps': 3, 'lr_scheduler_type': 'linear', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,█▃▂▁▁▁▁▁▁
eval/rouge1,▁▄▆▆▇▆▇█▇
eval/rouge2,▁▄▅▆▇▆▇█▇
eval/rougeL,▁▄▆▇▇▆▇█▇
eval/runtime,█▇▂█▆▁▃▅▆
eval/samples_per_second,▁▂▇▁▃█▆▄▃
eval/steps_per_second,▁▂▇▁▃█▆▄▃
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇████
train/learning_rate,▄█▇▆▅▃▂▁

0,1
eval/loss,0.53022
eval/rouge1,0.37485
eval/rouge2,0.14046
eval/rougeL,0.36003
eval/runtime,9.1894
eval/samples_per_second,54.302
eval/steps_per_second,1.741
train/epoch,8.0
train/global_step,1040.0
train/learning_rate,1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112480486432711, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
0,0.7762,0.569161,0.364882,0.129418,0.348223
1,0.5376,0.543093,0.375262,0.139214,0.359102
3,0.4482,0.530114,0.374062,0.135641,0.358968
4,0.4122,0.527793,0.381529,0.145793,0.364615
6,0.3552,0.538725,0.381569,0.150174,0.366411
7,0.3293,0.545427,0.383357,0.149002,0.367229


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 09:48:32,829] Trial 2 finished with value: 0.0 and parameters: {'learning_rate': 2.0419700502120603e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 22, 'warmup_ratio': 0.027709566916837014, 'optim': 'adamw_torch', 'gradient_accumulation_steps': 3, 'lr_scheduler_type': 'cosine', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,█▄▂▁▁▂▃▄▁
eval/rouge1,▁▅▅▄▇▆▇█▇
eval/rouge2,▁▄▃▃▇▅██▇
eval/rougeL,▁▅▅▅▇▅██▇
eval/runtime,▅▆▁▇▇▆▄▃█
eval/samples_per_second,▄▃█▂▂▃▅▆▁
eval/steps_per_second,▄▃█▂▂▃▅▆▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇████
train/learning_rate,██▇▆▅▄▃▁

0,1
eval/loss,0.52779
eval/rouge1,0.38153
eval/rouge2,0.14579
eval/rougeL,0.36462
eval/runtime,9.5215
eval/samples_per_second,52.408
eval/steps_per_second,1.68
train/epoch,8.0
train/global_step,2077.0
train/learning_rate,1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112220543954108, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
1,0.9231,0.604939,0.343386,0.113465,0.329538
2,0.5727,0.5546,0.363795,0.127711,0.348466
3,0.5118,0.536462,0.373021,0.133597,0.355391
4,0.4672,0.52966,0.380584,0.139955,0.364008
5,0.431,0.528519,0.377724,0.143122,0.362635
6,0.4001,0.530378,0.374918,0.139826,0.360043
7,0.3718,0.537404,0.378834,0.144525,0.362886
8,0.3461,0.539946,0.388058,0.149386,0.371383


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 10:17:06,891] Trial 3 finished with value: 0.0 and parameters: {'learning_rate': 3.0653328423101634e-05, 'per_device_train_batch_size': 32, 'num_train_epochs': 26, 'warmup_ratio': 0.06432671094440433, 'optim': 'adamw_torch', 'gradient_accumulation_steps': 3, 'lr_scheduler_type': 'linear', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,█▃▂▁▁▁▂▂▁
eval/rouge1,▁▄▆▇▆▆▇█▆
eval/rouge2,▁▄▅▆▇▆▇█▇
eval/rougeL,▁▄▅▇▇▆▇█▇
eval/runtime,▅▄▆▄▅▂▁█▆
eval/samples_per_second,▄▅▃▅▄▇█▁▃
eval/steps_per_second,▄▅▃▅▄▇█▁▃
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇████
train/learning_rate,▁█▇▇▆▅▄▄

0,1
eval/loss,0.52852
eval/rouge1,0.37772
eval/rouge2,0.14312
eval/rougeL,0.36264
eval/runtime,9.2539
eval/samples_per_second,53.923
eval/steps_per_second,1.729
train/epoch,8.0
train/global_step,1040.0
train/learning_rate,2e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112678423523903, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
0,0.7387,0.571101,0.357137,0.127379,0.342487
1,0.5344,0.537981,0.375412,0.141973,0.361314
2,0.4587,0.534304,0.383989,0.144337,0.363597
4,0.3156,0.582232,0.380287,0.149235,0.365199
5,0.2564,0.6179,0.372404,0.139136,0.356692


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 10:43:58,063] Trial 4 finished with value: 0.0 and parameters: {'learning_rate': 9.627829275616381e-05, 'per_device_train_batch_size': 4, 'num_train_epochs': 23, 'warmup_ratio': 0.23586166348468168, 'optim': 'adamw_torch', 'gradient_accumulation_steps': 4, 'lr_scheduler_type': 'linear', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▄▁▁▂▅█▁
eval/rouge1,▁▅██▇▅█
eval/rouge2,▁▆▆██▅▆
eval/rougeL,▁▆▇█▇▅▇
eval/runtime,█▂▁▆▁▄▁
eval/samples_per_second,▁▆█▃█▅█
eval/steps_per_second,▁▆█▃█▅█
train/epoch,▁▁▂▂▄▄▅▅▇▇████
train/global_step,▁▁▂▂▄▄▅▅▇▇████
train/learning_rate,▁▃▄▆██

0,1
eval/loss,0.5343
eval/rouge1,0.38399
eval/rouge2,0.14434
eval/rougeL,0.3636
eval/runtime,8.7197
eval/samples_per_second,57.226
eval/steps_per_second,1.835
train/epoch,6.0
train/global_step,4672.0
train/learning_rate,9e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112269428041246, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
1,0.6737,0.557388,0.365416,0.127676,0.350619
2,0.5078,0.531931,0.369223,0.137229,0.353883
3,0.4395,0.523631,0.384848,0.146511,0.367188
4,0.3843,0.532489,0.381088,0.147686,0.365865
5,0.3386,0.542681,0.384101,0.152389,0.36774
6,0.2985,0.560214,0.381435,0.145882,0.364999


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 11:14:05,836] Trial 5 finished with value: 0.0 and parameters: {'learning_rate': 1.1195580112499379e-05, 'per_device_train_batch_size': 4, 'num_train_epochs': 27, 'warmup_ratio': 0.021867330838572407, 'optim': 'adamw_hf', 'gradient_accumulation_steps': 1, 'lr_scheduler_type': 'linear', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▇▃▁▃▅█▁
eval/rouge1,▁▂█▇█▇█
eval/rouge2,▁▄▆▇█▆▆
eval/rougeL,▁▂█▇█▇█
eval/runtime,▇▇▂▇▄█▁
eval/samples_per_second,▂▂▆▂▅▁█
eval/steps_per_second,▂▂▆▂▅▁█
train/epoch,▁▁▂▂▄▄▅▅▇▇████
train/global_step,▁▁▂▂▄▄▅▅▇▇████
train/learning_rate,█▇▅▄▂▁

0,1
eval/loss,0.52363
eval/rouge1,0.38485
eval/rouge2,0.14651
eval/rougeL,0.36719
eval/runtime,8.722
eval/samples_per_second,57.211
eval/steps_per_second,1.834
train/epoch,6.0
train/global_step,18690.0
train/learning_rate,1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111256956226296, max=1.0)…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
0,0.8794,0.610731,0.344659,0.114985,0.33019
2,0.5284,0.539351,0.373136,0.132513,0.354535
4,0.4296,0.524761,0.382173,0.146616,0.364589
6,0.3558,0.541721,0.378811,0.146482,0.363097
8,0.3253,0.548606,0.381045,0.14615,0.365383


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 11:47:11,550] Trial 6 finished with value: 0.0 and parameters: {'learning_rate': 2.1283349952003112e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 15, 'warmup_ratio': 0.18171097868373873, 'optim': 'adafactor', 'gradient_accumulation_steps': 4, 'lr_scheduler_type': 'cosine', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,█▄▂▂▁▂▂▃▁
eval/rouge1,▁▅▆▆▇█▇▇▇
eval/rouge2,▁▅▄▆▇█▇▇▇
eval/rougeL,▁▅▅▆▇█▇▇▇
eval/runtime,█▆▁▇▅▆▃▄▅
eval/samples_per_second,▁▃█▂▄▃▆▅▄
eval/steps_per_second,▁▃█▂▄▃▆▅▄
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇████
train/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇████
train/learning_rate,▁▅██▇▆▅▄

0,1
eval/loss,0.52476
eval/rouge1,0.38217
eval/rouge2,0.14662
eval/rougeL,0.36459
eval/runtime,9.1995
eval/samples_per_second,54.242
eval/steps_per_second,1.739
train/epoch,8.0
train/global_step,3116.0
train/learning_rate,1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112486322720846, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
1,0.7477,0.566374,0.364516,0.129751,0.348194
2,0.5281,0.535987,0.372023,0.13628,0.354985
3,0.4483,0.528247,0.380629,0.147036,0.364518
4,0.3576,0.548002,0.379014,0.147143,0.36289
5,0.2758,0.575984,0.377501,0.148828,0.36312
6,0.2095,0.603378,0.377732,0.148348,0.361222


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 12:09:11,388] Trial 7 finished with value: 0.0 and parameters: {'learning_rate': 7.759960939441187e-05, 'per_device_train_batch_size': 32, 'num_train_epochs': 12, 'warmup_ratio': 0.2184368085760558, 'optim': 'adamw_hf', 'gradient_accumulation_steps': 1, 'lr_scheduler_type': 'cosine', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▅▂▁▃▅█▁
eval/rouge1,▁▄█▇▇▇█
eval/rouge2,▁▃▇▇██▇
eval/rougeL,▁▄█▇▇▇█
eval/runtime,█▁▂▇▄█▃
eval/samples_per_second,▁█▇▂▅▁▆
eval/steps_per_second,▁█▇▂▅▁▆
train/epoch,▁▁▂▂▄▄▅▅▇▇████
train/global_step,▁▁▂▂▄▄▅▅▇▇████
train/learning_rate,▁▅█▇▆▅

0,1
eval/loss,0.52825
eval/rouge1,0.38063
eval/rouge2,0.14704
eval/rougeL,0.36452
eval/runtime,8.9072
eval/samples_per_second,56.022
eval/steps_per_second,1.796
train/epoch,6.0
train/global_step,2340.0
train/learning_rate,6e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112416701184378, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
1,0.805,0.590361,0.35307,0.120144,0.336998
2,0.5636,0.548346,0.373358,0.134995,0.356968
3,0.5015,0.531786,0.376587,0.139515,0.360598
4,0.4416,0.528579,0.383427,0.147176,0.365587
5,0.3804,0.538028,0.391862,0.154821,0.375494
6,0.3203,0.561426,0.380294,0.144201,0.361093
7,0.2562,0.58818,0.378882,0.144727,0.362968


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 12:36:00,072] Trial 8 finished with value: 0.0 and parameters: {'learning_rate': 4.515705789656122e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 24, 'warmup_ratio': 0.2303326549082892, 'optim': 'adamw_torch', 'gradient_accumulation_steps': 1, 'lr_scheduler_type': 'linear', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "kobart_

VBox(children=(Label(value='472.661 MB of 472.661 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,█▃▁▁▂▅█▁
eval/rouge1,▁▅▅▆█▆▆▆
eval/rouge2,▁▄▅▆█▆▆▆
eval/rougeL,▁▅▅▆█▅▆▆
eval/runtime,▇▁▄▄▅█▄▄
eval/samples_per_second,▂█▅▅▄▁▅▅
eval/steps_per_second,▂█▅▄▄▁▅▄
train/epoch,▁▁▂▂▃▃▅▅▆▆▇▇████
train/global_step,▁▁▂▂▃▃▅▅▆▆▇▇████
train/learning_rate,▁▃▄▆▇██

0,1
eval/loss,0.52858
eval/rouge1,0.38343
eval/rouge2,0.14718
eval/rougeL,0.36559
eval/runtime,9.2693
eval/samples_per_second,53.834
eval/steps_per_second,1.726
train/epoch,7.0
train/global_step,5453.0
train/learning_rate,4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112175260980925, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
1,0.8102,0.5938,0.353104,0.119105,0.337299
2,0.5664,0.549556,0.370176,0.132408,0.355062
3,0.5048,0.53038,0.371765,0.136098,0.355966
4,0.4435,0.525955,0.382022,0.146392,0.364857
5,0.3906,0.528711,0.384217,0.148396,0.366739
6,0.3469,0.543156,0.381277,0.147669,0.366788
7,0.3091,0.557211,0.377228,0.143517,0.360896


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[I 2024-08-30 13:04:32,241] Trial 9 finished with value: 0.0 and parameters: {'learning_rate': 1.996091241653689e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 19, 'warmup_ratio': 0.13691082569395924, 'optim': 'adamw_torch', 'gradient_accumulation_steps': 2, 'lr_scheduler_type': 'linear', 'fp16': False}. Best is trial 0 with value: 0.0.


Best trial: 0.0
Best parameters: {'learning_rate': 2.1169083140275866e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 26, 'warmup_ratio': 0.09577831393575928, 'optim': 'adamw_hf', 'gradient_accumulation_steps': 3, 'lr_scheduler_type': 'cosine', 'fp16': False}


In [12]:
def objective(trial):
    # Optuna가 제공하는 hyperparameters를 사용하여 config 업데이트
    config_data['training']['learning_rate'] = 2.1169083140275866e-05
    config_data['training']['per_device_train_batch_size'] = 8
    config_data['training']['num_train_epochs'] = 26
    config_data['training']['warmup_ratio'] = 0.09577831393575928
    config_data['training']['optim'] = 'adamw_hf'
    config_data['training']['gradient_accumulation_steps'] = 3
    config_data['training']['lr_scheduler_type'] = 'cosine'
    config_data['training']['fp16'] = False

    # Device 설정
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(f"Running on: {device}")

    # 모델 및 tokenizer 불러오기
    generate_model, tokenizer = load_tokenizer_and_model_for_train(config_data, device)
    print("Model and Tokenizer Loaded.")

    # 데이터 로드 및 전처리
    preprocessor = Preprocess(config_data['tokenizer']['bos_token'], config_data['tokenizer']['eos_token'])
    train_inputs_dataset, val_inputs_dataset = prepare_train_dataset(config_data, preprocessor, config_data['general']['data_path'], tokenizer)

    # Trainer 설정
    trainer = load_trainer_for_train(config_data, generate_model, tokenizer, train_inputs_dataset, val_inputs_dataset)

    # Training 시작
    trainer.train()

    # Validation 성능을 기준으로 최적의 하이퍼파라미터 선택
    eval_metrics = trainer.evaluate(eval_dataset=val_inputs_dataset)
    rougeL = eval_metrics.get('rougeL', 0.0)

    return rougeL

In [13]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Running on: {device}")

# 모델 및 tokenizer 불러오기
generate_model, tokenizer = load_tokenizer_and_model_for_train(config_data, device)
print("Model and Tokenizer Loaded.")

# 데이터 로드 및 전처리
preprocessor = Preprocess(config_data['tokenizer']['bos_token'], config_data['tokenizer']['eos_token'])
train_inputs_dataset, val_inputs_dataset = prepare_train_dataset(config_data, preprocessor, config_data['general']['data_path'], tokenizer)

# Trainer 설정
trainer = load_trainer_for_train(config_data, generate_model, tokenizer, train_inputs_dataset, val_inputs_dataset)

# Training 시작
trainer.train()

# Save the best model checkpoint path to the config
best_model_path = trainer.state.best_model_checkpoint
config_data['inference']['ckt_path'] = best_model_path

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------
BartConfig {
  "_name_or_path": "EbanLee/kobart-summary-v3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "EbanLee(rudwo6769@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": fals

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


---------- Make dataset complete ----------
---------- Make training arguments ----------


[34m[1mwandb[0m: Currently logged in as: [33mrmadyd0314[0m ([33mlegendki[0m). Use [1m`wandb login --relogin`[0m to force relogin


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel
1,1.022,0.634309,0.326655,0.101321,0.312527
2,0.6082,0.581463,0.351159,0.117514,0.336022
3,0.5586,0.556011,0.364429,0.128457,0.346924
4,0.5225,0.544667,0.367032,0.132963,0.348829
5,0.4968,0.538125,0.371644,0.135485,0.352777
6,0.4759,0.534033,0.372839,0.135696,0.354633
7,0.4561,0.532377,0.370061,0.132959,0.352563
8,0.4392,0.5306,0.374595,0.138304,0.358497
9,0.4238,0.531613,0.376204,0.143367,0.361171
10,0.4109,0.533131,0.374713,0.140592,0.358827


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [16]:
class DatasetForInference(Dataset):
    def __init__(self, encoder_input, test_id, len):
        self.encoder_input = encoder_input
        self.test_id = test_id
        self.len = len

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encoder_input.items()}
        item['ID'] = self.test_id[idx]
        return item

    def __len__(self):
        return self.len

In [17]:
def prepare_test_dataset(config,preprocessor, tokenizer):

    test_file_path = os.path.join(config['general']['data_path'],'test.csv')

    test_data = preprocessor.make_set_as_df(test_file_path,is_train=False)
    test_id = test_data['fname']

    print('-'*150)
    print(f'test_data:\n{test_data["dialogue"][0]}')
    print('-'*150)

    encoder_input_test , decoder_input_test = preprocessor.make_input(test_data,is_test=True)
    print('-'*10, 'Load data complete', '-'*10,)

    test_tokenized_encoder_inputs = tokenizer(encoder_input_test, return_tensors="pt", padding=True,
                    add_special_tokens=True, truncation=True, max_length=config['tokenizer']['encoder_max_len'], return_token_type_ids=False,)
    test_tokenized_decoder_inputs = tokenizer(decoder_input_test, return_tensors="pt", padding=True,
                    add_special_tokens=True, truncation=True, max_length=config['tokenizer']['decoder_max_len'], return_token_type_ids=False,)

    test_encoder_inputs_dataset = DatasetForInference(test_tokenized_encoder_inputs, test_id, len(encoder_input_test))
    print('-'*10, 'Make dataset complete', '-'*10,)

    return test_data, test_encoder_inputs_dataset

In [18]:
def load_tokenizer_and_model_for_test(config,device):
    print('-'*10, 'Load tokenizer & model', '-'*10,)

    model_name = config['general']['model_name']
    ckt_path = config['inference']['ckt_path']
    print('-'*10, f'Model Name : {model_name}', '-'*10,)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    special_tokens_dict = {'additional_special_tokens': config['tokenizer']['special_tokens']}
    tokenizer.add_special_tokens(special_tokens_dict)

    generate_model = BartForConditionalGeneration.from_pretrained(ckt_path)
    generate_model.resize_token_embeddings(len(tokenizer))
    generate_model.to(device)
    print('-'*10, 'Load tokenizer & model complete', '-'*10,)

    return generate_model , tokenizer

In [19]:
def inference(config):
    device = torch.device('cuda:0' if torch.cuda.is_available()  else 'cpu')
    print('-'*10, f'device : {device}', '-'*10,)
    print(torch.__version__)

    generate_model , tokenizer = load_tokenizer_and_model_for_test(config,device)

    data_path = config['general']['data_path']
    preprocessor = Preprocess(config['tokenizer']['bos_token'], config['tokenizer']['eos_token'])

    test_data, test_encoder_inputs_dataset = prepare_test_dataset(config,preprocessor, tokenizer)
    dataloader = DataLoader(test_encoder_inputs_dataset, batch_size=config['inference']['batch_size'])

    summary = []
    text_ids = []
    with torch.no_grad():
        for item in tqdm(dataloader):
            text_ids.extend(item['ID'])
            generated_ids = generate_model.generate(input_ids=item['input_ids'].to('cuda:0'),
                            no_repeat_ngram_size=config['inference']['no_repeat_ngram_size'],
                            early_stopping=config['inference']['early_stopping'],
                            max_length=config['inference']['generate_max_length'],
                            num_beams=config['inference']['num_beams'],
                        )
            for ids in generated_ids:
                result = tokenizer.decode(ids)
                summary.append(result)

    remove_tokens = config['inference']['remove_tokens']
    preprocessed_summary = summary.copy()
    for token in remove_tokens:
        preprocessed_summary = [sentence.replace(token," ") for sentence in preprocessed_summary]

    output = pd.DataFrame(
        {
            "fname": test_data['fname'],
            "summary" : preprocessed_summary,
        }
    )
    result_path = config['inference']['result_path']
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    output.to_csv(os.path.join(result_path, "output.csv"), index=False)

    return output

In [20]:

def inference(config):
    device = torch.device('cuda:0' if torch.cuda.is_available()  else 'cpu')
    print('-'*10, f'device : {device}', '-'*10,)
    print(torch.__version__)

    generate_model , tokenizer = load_tokenizer_and_model_for_test(config,device)

    data_path = config['general']['data_path']
    preprocessor = Preprocess(config['tokenizer']['bos_token'], config['tokenizer']['eos_token'])

    test_data, test_encoder_inputs_dataset = prepare_test_dataset(config,preprocessor, tokenizer)
    dataloader = DataLoader(test_encoder_inputs_dataset, batch_size=config['inference']['batch_size'])

    summary = []
    text_ids = []
    with torch.no_grad():
        for item in tqdm(dataloader):
            text_ids.extend(item['ID'])
            generated_ids = generate_model.generate(input_ids=item['input_ids'].to('cuda:0'),
                            no_repeat_ngram_size=config['inference']['no_repeat_ngram_size'],
                            early_stopping=config['inference']['early_stopping'],
                            max_length=config['inference']['generate_max_length'],
                            num_beams=config['inference']['num_beams'],
                        )
            for ids in generated_ids:
                result = tokenizer.decode(ids)
                summary.append(result)

    remove_tokens = config['inference']['remove_tokens']
    preprocessed_summary = summary.copy()
    for token in remove_tokens:
        preprocessed_summary = [sentence.replace(token," ") for sentence in preprocessed_summary]

    output = pd.DataFrame(
        {
            "fname": test_data['fname'],
            "summary" : preprocessed_summary,
        }
    )
    result_path = config['inference']['result_path']
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    output.to_csv(os.path.join(result_path, "output.csv"), index=False)

    return output

In [21]:
if __name__ == "__main__":
    output = inference(config_data)
    print(output)

---------- device : cuda:0 ----------
2.1.0
---------- Load tokenizer & model ----------
---------- Model Name : EbanLee/kobart-summary-v3 ----------


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'NEGATIVE', '1': 'POSITIVE'}. The number of labels wil be overwritten to 2.


---------- Load tokenizer & model complete ----------
------------------------------------------------------------------------------------------------------------------------------------------------------
test_data:
#Person1#: 더슨 씨, 받아쓰기 좀 해주세요. 
#Person2#: 네, 실장님...
#Person1#: 이것은 오늘 오후까지 모든 직원에게 내부 메모로 전달되어야 합니다. 준비되셨나요?
#Person2#: 네, 실장님. 시작하셔도 됩니다.
#Person1#: 모든 직원들에게 주의하라... 즉시 효력을 발휘하여, 모든 사무실 통신은 이메일 통신과 공식 메모로 제한됩니다. 근무 시간 동안 직원들이 즉시 메시지 프로그램을 사용하는 것은 엄격히 금지됩니다.
#Person2#: 실장님, 이것은 내부 통신에만 적용되는 건가요? 아니면 외부 통신에도 제한이 되는 건가요?
#Person1#: 이것은 모든 통신에 적용되어야 합니다, 이 사무실 내의 직원들 사이뿐만 아니라 외부 통신에도 마찬가지입니다.
#Person2#: 하지만 실장님, 많은 직원들이 고객과 소통하기 위해 즉시 메시지를 사용하고 있습니다.
#Person1#: 그들은 그들의 의사소통 방법을 바꾸어야만 합니다. 이 사무실에서 누구도 즉시 메시지를 사용하지 않기를 원합니다. 너무 많은 시간을 낭비하게 됩니다! 이제, 메모를 계속해주세요. 우리가 어디까지 했나요?
#Person2#: 이것은 내부와 외부 통신에 적용됩니다.
#Person1#: 그렇습니다. 즉시 메시지를 계속 사용하는 어떤 직원이라도 먼저 경고를 받고 직무 정지에 처해질 것입니다. 두 번째 위반 시에는 직원은 해고에 처해질 것입니다. 이 새로운 정책에 대한 어떤 질문이라도 부서장에게 직접 문의하면 됩니다.
#Person2#: 그게 다신가요?
#Person1#: 네. 

100%|██████████| 16/16 [00:23<00:00,  1.46s/it]

        fname                                            summary
0      test_0     더슨 씨는 #Person1# 에게 모든 직원에게 내부 메모가 전달되어야 한다고...
1      test_1    #Person1# 은 교통 체증에 걸렸다. #Person2# 는 #Person1...
2      test_2     케이트는 마샤와 히어로가 2개월 동안 별거 중이다가 이혼을 신청했다고 #Per...
3      test_3    #Person1# 은 브라이언의 생일을 축하하기 위해 파티를 즐긴다. 브란드는 ...
4      test_4    #Person2# 는 #Person1# 에게 올림픽 스타디움에 있는 올림픽 공원...
..        ...                                                ...
494  test_495     잭이 찰리에게 새 게임에 대해 묻습니다. 찰리는 잭에게 캐릭터를 만드는 게임을...
495  test_496    #Person2# 는 #Person1# 에게 #Person2# 가 컨트리 음악 ...
496  test_497     앨리스는 #Person1# 에게 세탁기, 건조기, 비누를 어떻게 사용하는지 가...
497  test_498     스티브는 매튜에게 그녀의 계약이 다음 달에 끝나기 때문에 최근에 살 곳을 찾고...
498  test_499     프랭크는 벳시에게 승진하고 친구들 모두를 위한 큰 파티를 열 계획이라고 말한다...

[499 rows x 2 columns]





In [23]:
output_path = os.path.join(config_data['inference']['result_path'], "output.csv")
output_df = pd.read_csv(output_path)
print(output_df.head())

    fname                                            summary
0  test_0     더슨 씨는 #Person1# 에게 모든 직원에게 내부 메모가 전달되어야 한다고...
1  test_1    #Person1# 은 교통 체증에 걸렸다. #Person2# 는 #Person1...
2  test_2     케이트는 마샤와 히어로가 2개월 동안 별거 중이다가 이혼을 신청했다고 #Per...
3  test_3    #Person1# 은 브라이언의 생일을 축하하기 위해 파티를 즐긴다. 브란드는 ...
4  test_4    #Person2# 는 #Person1# 에게 올림픽 스타디움에 있는 올림픽 공원...
