In [1]:
import pandas as pd
import os
import re
import json
import yaml
from glob import glob
from tqdm import tqdm
from pprint import pprint
import torch
import pytorch_lightning as pl
from rouge import Rouge

from torch.utils.data import Dataset , DataLoader
from transformers import AutoTokenizer, BartForConditionalGeneration, BartConfig
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import Trainer, TrainingArguments
from transformers import EarlyStoppingCallback

import wandb 

In [2]:
tokenizer = AutoTokenizer.from_pretrained("knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM")

In [3]:
config_data = {
    "general": {
        "data_path": "../data/", 
        "model_name": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
        "output_dir": "./" 
    },
    "tokenizer": {
        "encoder_max_len": 512,
        "decoder_max_len": 100,
        "bos_token": f"{tokenizer.bos_token}",
        "eos_token": f"{tokenizer.eos_token}",
        "special_tokens": ['#Person1#', '#Person2#', '#Person3#', '#PhoneNumber#', '#Address#', '#PassportNumber#']
    },
    "training": {
        "overwrite_output_dir": True,
        "num_train_epochs": 12,
        "learning_rate": 2.7239932412675317e-05,
        "per_device_train_batch_size": 4,
        "per_device_eval_batch_size": 8,
        "warmup_ratio": 0.2534231630584237,
        "weight_decay": 0.01,
        "lr_scheduler_type": 'linear',
        "optim": 'adamw_hf',
        "gradient_accumulation_steps": 1,
        "evaluation_strategy": 'epoch',
        "save_strategy": 'epoch',
        "save_total_limit": 5,
        "fp16": True,
        "load_best_model_at_end": True,
        "seed": 42,
        "logging_dir": "./logs",
        "logging_strategy": "epoch",
        "predict_with_generate": True,
        "generation_max_length": 100,
        "do_train": True,
        "do_eval": True,
        "early_stopping_patience": 3,
        "early_stopping_threshold": 0.001,
        "report_to": "wandb"
    },

    "wandb": {
        "entity": "legendki",
        "project": "NLP-Summarization",
        "name": "BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
    },
    "inference": {
        "ckt_path": "model ckt path", 
        "result_path": "./prediction/",
        "no_repeat_ngram_size": 2,
        "early_stopping": True,
        "generate_max_length": 100,
        "num_beams": 4,
        "batch_size" : 32,
        "remove_tokens": ['<usr>', f"{tokenizer.bos_token}", f"{tokenizer.eos_token}", f"{tokenizer.pad_token}"]
    }
}

In [4]:
config_path = "./config.yaml"
with open(config_path, "w") as file:
    yaml.dump(config_data, file, allow_unicode=True)
    
config_path = "./config.yaml"
with open(config_path, "r") as file:
    loaded_config = yaml.safe_load(file)

In [5]:
pprint(loaded_config)
loaded_config['general']
loaded_config['tokenizer']
data_path = loaded_config['general']['data_path']

{'general': {'data_path': '../data/',
             'model_name': 'knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM',
             'output_dir': './'},
 'inference': {'batch_size': 32,
               'ckt_path': 'model ckt path',
               'early_stopping': True,
               'generate_max_length': 100,
               'no_repeat_ngram_size': 2,
               'num_beams': 4,
               'remove_tokens': ['<usr>', '<s>', '</s>', '<pad>'],
               'result_path': './prediction/'},
 'tokenizer': {'bos_token': '<s>',
               'decoder_max_len': 100,
               'encoder_max_len': 512,
               'eos_token': '</s>',
               'special_tokens': ['#Person1#',
                                  '#Person2#',
                                  '#Person3#',
                                  '#PhoneNumber#',
                                  '#Address#',
                                  '#PassportNumber#']},
 'training': {'do_eval': True,
              '

In [6]:
train_df = pd.read_csv(os.path.join(data_path,'train_translated.csv'))
train_df.tail()

Unnamed: 0,fname,dialogue,summary,topic,translated_dialogue,translated_summary
12452,train_12455,#Person1#: 실례합니다. 맨체스터 출신의 그린 씨이신가요?\n#Person2...,탄 링은 흰머리와 수염으로 쉽게 인식되는 그린 씨를 만나 호텔로 데려갈 예정입니다....,누군가를 태우다,#Person1#: Excuse me. Are you Mr. Green from M...,"Tan Ling is going to meet Mr. Green, who is ea..."
12453,train_12456,#Person1#: 이윙 씨가 우리가 컨퍼런스 센터에 오후 4시에 도착해야 한다고 ...,#Person1#과 #Person2#는 이윙 씨가 늦지 않도록 요청했기 때문에 컨퍼...,컨퍼런스 센터,#Person1#: Mr. Ewing said we should be at the ...,#Person1# and #Person2# planned to take the su...
12454,train_12457,#Person1#: 오늘 어떻게 도와드릴까요?\n#Person2#: 차를 빌리고 싶...,#Person2#는 #Person1#의 도움으로 5일 동안 소형 차를 빌립니다.,차 렌트,#Person1#: How can I help you today?\n#Person2...,#Person2# rents a compact car for five days wi...
12455,train_12458,#Person1#: 오늘 좀 행복해 보이지 않아. 무슨 일 있어?\n#Person2...,#Person2#의 엄마가 일자리를 잃었다. #Person2#는 엄마가 우울해하지 ...,실직,#Person1#: You don't look very happy today. Is...,#Person2#'s mom lost her job. #Person2# doesn'...
12456,train_12459,"#Person1#: 엄마, 다음 토요일에 이 삼촌네 가족을 방문하기 위해 비행기를 ...",#Person1#은 다음 토요일에 이 삼촌네를 방문할 때 가방을 어떻게 싸야 할지 ...,짐 싸기,"#Person1#: Mom, we're flying to visit my Uncle...",#Person1# asks #Person2# for advice on how to ...


In [7]:
val_df = pd.read_csv(os.path.join(data_path,'dev_translated.csv'))
val_df.tail()

Unnamed: 0,fname,dialogue,summary,topic,translated_dialogue,translated_summary
494,dev_495,#Person1#: 이제 새해가 되어서 새로운 시작을 하려고 결심했어. \r\n#P...,#Person1#은 새해에 금연을 하고 커밍아웃하기로 결정했습니다. #Person2...,새해,"#Person1#: Now that it's a new year, I've deci...",#Person1# has decided to quit smoking and come...
495,dev_496,"#Person1#: 너, 조랑 결혼했지? \r\n#Person2#: 조? 무슨 말인...",#Person1#은 #Person2#가 조와 결혼했다고 생각했다. #Person2#...,사랑에 빠지다,"#Person1#: You, you married Joe, right? \r\n#P...",#Person1# thought #Person2# was married to Joe...
496,dev_497,"#Person1#: 무엇을 도와드릴까요, 부인?\r\n#Person2#: 몇 주 동...",#Person2#의 차에서 이상한 소리가 납니다. #Person1#는 브레이크를 교...,소음,"#Person1#: What can I do for you, ma'am?\r\n#P...",There is a strange noise coming from #Person2#...
497,dev_498,"#Person1#: 안녕하세요, 아마존 고객 서비스입니다. 무엇을 도와드릴까요?\n...",#Person2#님이 아마존 고객 서비스에 전화하여 아마존에서 받은 책에 한 페이지...,빠진 페이지,"#Person1#: Hello, this is Amazon customer serv...",#Person2# calls Amazon customer service to say...
498,dev_499,#Person1#: 여름이 다 되어간다는 게 믿기지 않아.\r\n#Person2#:...,#Person2#는 #Person1#에게 여름 휴가 동안 파티를 도와주는 회사에서 ...,여름 휴가,#Person1#: I can't believe it's almost the end...,#Person2# tells #Person1# that he will be work...


In [8]:
class Preprocess:
    def __init__(self,
            bos_token: str,
            eos_token: str,
        ) -> None:

        self.bos_token = bos_token
        self.eos_token = eos_token

    @staticmethod
    def make_set_as_df(file_path, is_train = True):
        if is_train:
            df = pd.read_csv(file_path)
            train_df = df[['fname','translated_dialogue','translated_summary']]
            return train_df
        else:
            df = pd.read_csv(file_path)
            test_df = df[['fname','translated_dialogue']]
            return test_df

    def make_input(self, dataset,is_test = False):
        if is_test:
            encoder_input = dataset['translated_dialogue']
            decoder_input = [self.bos_token] * len(dataset['translated_dialogue'])
            return encoder_input.tolist(), list(decoder_input)
        else:
            encoder_input = dataset['translated_dialogue']
            decoder_input = dataset['translated_summary'].apply(lambda x : self.bos_token + str(x))
            decoder_output = dataset['translated_summary'].apply(lambda x : str(x) + self.eos_token)
            return encoder_input.tolist(), decoder_input.tolist(), decoder_output.tolist()

In [9]:
class DatasetForTrain(Dataset):
    def __init__(self, encoder_input, decoder_input, labels, len):
        self.encoder_input = encoder_input
        self.decoder_input = decoder_input
        self.labels = labels
        self.len = len

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encoder_input.items()}
        item2 = {key: val[idx].clone().detach() for key, val in self.decoder_input.items()} 
        item2['decoder_input_ids'] = item2['input_ids']
        item2['decoder_attention_mask'] = item2['attention_mask']
        item2.pop('input_ids')
        item2.pop('attention_mask')
        item.update(item2)
        item['labels'] = self.labels['input_ids'][idx]
        return item

    def __len__(self):
        return self.len
    

class DatasetForVal(Dataset):
    def __init__(self, encoder_input, decoder_input, labels, len):
        self.encoder_input = encoder_input
        self.decoder_input = decoder_input
        self.labels = labels
        self.len = len

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encoder_input.items()} 
        item2 = {key: val[idx].clone().detach() for key, val in self.decoder_input.items()} 
        item2['decoder_input_ids'] = item2['input_ids']
        item2['decoder_attention_mask'] = item2['attention_mask']
        item2.pop('input_ids')
        item2.pop('attention_mask')
        item.update(item2) 
        item['labels'] = self.labels['input_ids'][idx]
        return item

    def __len__(self):
        return self.len

class DatasetForInference(Dataset):
    def __init__(self, encoder_input, test_id, len):
        self.encoder_input = encoder_input
        self.test_id = test_id
        self.len = len

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encoder_input.items()}
        item['ID'] = self.test_id[idx]
        return item

    def __len__(self):
        return self.len

In [10]:
def prepare_train_dataset(config, preprocessor, data_path, tokenizer):
    train_file_path = os.path.join(data_path,'train_translated.csv')
    val_file_path = os.path.join(data_path,'dev_translated.csv')

    train_data = preprocessor.make_set_as_df(train_file_path)
    val_data = preprocessor.make_set_as_df(val_file_path)

    print('-'*150)
    print(f'train_data:\n {train_data["translated_dialogue"][0]}')
    print(f'train_label:\n {train_data["translated_summary"][0]}')

    print('-'*150)
    print(f'val_data:\n {val_data["translated_dialogue"][0]}')
    print(f'val_label:\n {val_data["translated_summary"][0]}')

    encoder_input_train , decoder_input_train, decoder_output_train = preprocessor.make_input(train_data)
    encoder_input_val , decoder_input_val, decoder_output_val = preprocessor.make_input(val_data)
    print('-'*10, 'Load data complete', '-'*10,)

    tokenized_encoder_inputs = tokenizer(encoder_input_train, return_tensors="pt", padding=True,
                            add_special_tokens=True, truncation=True, max_length=config['tokenizer']['encoder_max_len'], return_token_type_ids=False)
    tokenized_decoder_inputs = tokenizer(decoder_input_train, return_tensors="pt", padding=True,
                        add_special_tokens=True, truncation=True, max_length=config['tokenizer']['decoder_max_len'], return_token_type_ids=False)
    tokenized_decoder_ouputs = tokenizer(decoder_output_train, return_tensors="pt", padding=True,
                        add_special_tokens=True, truncation=True, max_length=config['tokenizer']['decoder_max_len'], return_token_type_ids=False)

    train_inputs_dataset = DatasetForTrain(tokenized_encoder_inputs, tokenized_decoder_inputs, tokenized_decoder_ouputs,len(encoder_input_train))

    val_tokenized_encoder_inputs = tokenizer(encoder_input_val, return_tensors="pt", padding=True,
                        add_special_tokens=True, truncation=True, max_length=config['tokenizer']['encoder_max_len'], return_token_type_ids=False)
    val_tokenized_decoder_inputs = tokenizer(decoder_input_val, return_tensors="pt", padding=True,
                        add_special_tokens=True, truncation=True, max_length=config['tokenizer']['decoder_max_len'], return_token_type_ids=False)
    val_tokenized_decoder_ouputs = tokenizer(decoder_output_val, return_tensors="pt", padding=True,
                        add_special_tokens=True, truncation=True, max_length=config['tokenizer']['decoder_max_len'], return_token_type_ids=False)

    val_inputs_dataset = DatasetForVal(val_tokenized_encoder_inputs, val_tokenized_decoder_inputs, val_tokenized_decoder_ouputs,len(encoder_input_val))

    print('-'*10, 'Make dataset complete', '-'*10,)
    return train_inputs_dataset, val_inputs_dataset

In [11]:
def compute_metrics(config,tokenizer,pred):
    rouge = Rouge()
    predictions = pred.predictions
    labels = pred.label_ids

    predictions[predictions == -100] = tokenizer.pad_token_id
    labels[labels == -100] = tokenizer.pad_token_id

    decoded_preds = tokenizer.batch_decode(predictions, clean_up_tokenization_spaces=True)
    labels = tokenizer.batch_decode(labels, clean_up_tokenization_spaces=True)

    replaced_predictions = decoded_preds.copy()
    replaced_labels = labels.copy()
    remove_tokens = config['inference']['remove_tokens']
    for token in remove_tokens:
        replaced_predictions = [sentence.replace(token," ") for sentence in replaced_predictions]
        replaced_labels = [sentence.replace(token," ") for sentence in replaced_labels]

    print('-'*150)
    print(f"PRED: {replaced_predictions[0]}")
    print(f"GOLD: {replaced_labels[0]}")
    print('-'*150)
    print(f"PRED: {replaced_predictions[1]}")
    print(f"GOLD: {replaced_labels[1]}")
    print('-'*150)
    print(f"PRED: {replaced_predictions[2]}")
    print(f"GOLD: {replaced_labels[2]}")

    results = rouge.get_scores(replaced_predictions, replaced_labels,avg=True)

    result = {key: value["f"] for key, value in results.items()}
    return result

In [12]:
def load_trainer_for_train(config,generate_model,tokenizer,train_inputs_dataset,val_inputs_dataset):
    print('-'*10, 'Make training arguments', '-'*10,)
    training_args = Seq2SeqTrainingArguments(
                output_dir=config['general']['output_dir'], 
                overwrite_output_dir=config['training']['overwrite_output_dir'],
                num_train_epochs=config['training']['num_train_epochs'],  
                learning_rate=config['training']['learning_rate'], 
                per_device_train_batch_size=config['training']['per_device_train_batch_size'], 
                per_device_eval_batch_size=config['training']['per_device_eval_batch_size'],
                warmup_ratio=config['training']['warmup_ratio'], 
                weight_decay=config['training']['weight_decay'],  
                lr_scheduler_type=config['training']['lr_scheduler_type'],
                optim =config['training']['optim'],
                gradient_accumulation_steps=config['training']['gradient_accumulation_steps'],
                evaluation_strategy=config['training']['evaluation_strategy'],
                save_strategy =config['training']['save_strategy'],
                save_total_limit=config['training']['save_total_limit'],
                fp16=config['training']['fp16'],
                load_best_model_at_end=config['training']['load_best_model_at_end'],
                seed=config['training']['seed'],
                logging_dir=config['training']['logging_dir'],
                logging_strategy=config['training']['logging_strategy'],
                predict_with_generate=config['training']['predict_with_generate'], 
                generation_max_length=config['training']['generation_max_length'],
                do_train=config['training']['do_train'],
                do_eval=config['training']['do_eval'],
                report_to=config['training']['report_to'] 
            )

    wandb.init(
        entity=config['wandb']['entity'],
        project=config['wandb']['project'],
        name=config['wandb']['name'],
    )

    os.environ["WANDB_LOG_MODEL"]="end"
    os.environ["WANDB_WATCH"]="false"

    MyCallback = EarlyStoppingCallback(
        early_stopping_patience=config['training']['early_stopping_patience'],
        early_stopping_threshold=config['training']['early_stopping_threshold']
    )
    print('-'*10, 'Make training arguments complete', '-'*10,)
    print('-'*10, 'Make trainer', '-'*10,)

    trainer = Seq2SeqTrainer(
        model=generate_model, 
        args=training_args,
        train_dataset=train_inputs_dataset,
        eval_dataset=val_inputs_dataset,
        compute_metrics = lambda pred: compute_metrics(config,tokenizer, pred),
        callbacks = [MyCallback]
    )
    print('-'*10, 'Make trainer complete', '-'*10,)

    return trainer

In [13]:
def load_tokenizer_and_model_for_train(config,device):
    print('-'*10, 'Load tokenizer & model', '-'*10,)
    print('-'*10, f'Model Name : {config["general"]["model_name"]}', '-'*10,)
    model_name = config['general']['model_name']
    bart_config = BartConfig().from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    generate_model = BartForConditionalGeneration.from_pretrained(config['general']['model_name'],config=bart_config)

    special_tokens_dict={'additional_special_tokens':config['tokenizer']['special_tokens']}
    tokenizer.add_special_tokens(special_tokens_dict)

    generate_model.resize_token_embeddings(len(tokenizer)) 
    generate_model.to(device)
    print(generate_model.config)

    print('-'*10, 'Load tokenizer & model complete', '-'*10,)
    return generate_model , tokenizer

In [14]:
def main(config):
    device = torch.device('cuda:0' if torch.cuda.is_available()  else 'cpu')
    print('-'*10, f'device : {device}', '-'*10,)
    print(torch.__version__)

    generate_model , tokenizer = load_tokenizer_and_model_for_train(config,device)
    print('-'*10,"tokenizer special tokens : ",tokenizer.special_tokens_map,'-'*10)

    preprocessor = Preprocess(config['tokenizer']['bos_token'], config['tokenizer']['eos_token']) # decoder_start_token: str, eos_token: str
    data_path = config['general']['data_path']
    train_inputs_dataset, val_inputs_dataset = prepare_train_dataset(config,preprocessor, data_path, tokenizer)

    trainer = load_trainer_for_train(config, generate_model,tokenizer,train_inputs_dataset,val_inputs_dataset)
    trainer.train()  

    wandb.finish()

In [15]:
if __name__ == "__main__":
    main(loaded_config)

---------- device : cuda:0 ----------
2.1.0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_ch

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


---------- Make dataset complete ----------
---------- Make training arguments ----------


[34m[1mwandb[0m: Currently logged in as: [33mrmadyd0314[0m ([33mlegendki[0m). Use [1m`wandb login --relogin`[0m to force relogin


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------




Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
1,1.345,0.492696,0.496252,0.212064,0.465547
2,0.3607,0.518918,0.482701,0.19797,0.446882
3,0.3051,0.554917,0.475083,0.190306,0.439825
4,0.2437,0.55026,0.475856,0.182316,0.440986


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:   #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                          
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy are going to meet at the gym at 3:30 p.m. to work out. #Person1# wants to work on arms and abs, but Jimmy is following a weekly schedule.                                                        
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to wor

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▁▄█▇
eval/rouge-1,█▄▁▁
eval/rouge-2,█▅▃▁
eval/rouge-l,█▃▁▁
eval/runtime,▅▅█▁
eval/samples_per_second,▄▄▁█
eval/steps_per_second,▄▄▁█
train/epoch,▁▁▃▃▆▆███
train/global_step,▁▁▃▃▆▆███
train/learning_rate,▁▄█▇

0,1
eval/loss,0.55026
eval/rouge-1,0.47586
eval/rouge-2,0.18232
eval/rouge-l,0.44099
eval/runtime,78.268
eval/samples_per_second,6.376
eval/steps_per_second,0.805
train/epoch,4.0
train/global_step,12460.0
train/learning_rate,2e-05


In [16]:
loaded_config['inference']['ckt_path'] = "./checkpoint-9345"

In [17]:
def prepare_test_dataset(config,preprocessor, tokenizer):

    test_file_path = os.path.join(config['general']['data_path'],'test_translated.csv')

    test_data = preprocessor.make_set_as_df(test_file_path,is_train=False)
    test_id = test_data['fname']

    print('-'*150)
    print(f'test_data:\n{test_data["translated_dialogue"][0]}')
    print('-'*150)

    encoder_input_test , decoder_input_test = preprocessor.make_input(test_data,is_test=True)
    print('-'*10, 'Load data complete', '-'*10,)

    test_tokenized_encoder_inputs = tokenizer(encoder_input_test, return_tensors="pt", padding=True,
                    add_special_tokens=True, truncation=True, max_length=config['tokenizer']['encoder_max_len'], return_token_type_ids=False,)
    test_tokenized_decoder_inputs = tokenizer(decoder_input_test, return_tensors="pt", padding=True,
                    add_special_tokens=True, truncation=True, max_length=config['tokenizer']['decoder_max_len'], return_token_type_ids=False,)

    test_encoder_inputs_dataset = DatasetForInference(test_tokenized_encoder_inputs, test_id, len(encoder_input_test))
    print('-'*10, 'Make dataset complete', '-'*10,)

    return test_data, test_encoder_inputs_dataset

In [18]:
def load_tokenizer_and_model_for_test(config,device):
    print('-'*10, 'Load tokenizer & model', '-'*10,)

    model_name = config['general']['model_name']
    ckt_path = config['inference']['ckt_path']
    print('-'*10, f'Model Name : {model_name}', '-'*10,)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    special_tokens_dict = {'additional_special_tokens': config['tokenizer']['special_tokens']}
    tokenizer.add_special_tokens(special_tokens_dict)

    generate_model = BartForConditionalGeneration.from_pretrained(ckt_path)
    generate_model.resize_token_embeddings(len(tokenizer))
    generate_model.to(device)
    print('-'*10, 'Load tokenizer & model complete', '-'*10,)

    return generate_model , tokenizer

In [20]:
def inference(config):
    device = torch.device('cuda:0' if torch.cuda.is_available()  else 'cpu')
    print('-'*10, f'device : {device}', '-'*10,)
    print(torch.__version__)

    generate_model , tokenizer = load_tokenizer_and_model_for_test(config,device)

    data_path = config['general']['data_path']
    preprocessor = Preprocess(config['tokenizer']['bos_token'], config['tokenizer']['eos_token'])

    test_data, test_encoder_inputs_dataset = prepare_test_dataset(config,preprocessor, tokenizer)
    dataloader = DataLoader(test_encoder_inputs_dataset, batch_size=config['inference']['batch_size'])

    summary = []
    text_ids = []
    with torch.no_grad():
        for item in tqdm(dataloader):
            text_ids.extend(item['ID'])
            generated_ids = generate_model.generate(input_ids=item['input_ids'].to('cuda:0'),
                            no_repeat_ngram_size=config['inference']['no_repeat_ngram_size'],
                            early_stopping=config['inference']['early_stopping'],
                            max_length=config['inference']['generate_max_length'],
                            num_beams=config['inference']['num_beams'],
                        )
            for ids in generated_ids:
                result = tokenizer.decode(ids)
                summary.append(result)

    remove_tokens = config['inference']['remove_tokens']
    preprocessed_summary = summary.copy()
    for token in remove_tokens:
        preprocessed_summary = [sentence.replace(token," ") for sentence in preprocessed_summary]

    output = pd.DataFrame(
        {
            "fname": test_data['fname'],
            "summary" : preprocessed_summary,
        }
    )
    result_path = config['inference']['result_path']
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    output.to_csv(os.path.join(result_path, "tuned_bart_xsum.csv"), index=False)

    return output

In [21]:
if __name__ == "__main__":
    output = inference(loaded_config)

---------- device : cuda:0 ----------
2.1.0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
---------- Load tokenizer & model complete ----------
------------------------------------------------------------------------------------------------------------------------------------------------------
test_data:
#Person1#: Mr. Derson, please take down dictation.
#Person2#: Yes, Mr. President...
#Person1#: This is to be an internal memo to all employees by this afternoon. Are you ready?
#Person2#: Yes, Mr. President. You may begin.
#Person1#: Attention all employees... effective immediately, all office communications are to be limited to email correspondence and official memos. The use of instant message programs by employees during working hours is strictly prohibited.
#Person2#: Mr. President, does this apply only to internal communications? Or is there a restriction on external communications as wel

100%|██████████| 16/16 [00:42<00:00,  2.67s/it]


In [22]:
from openai import OpenAI
import pandas as pd

In [23]:
client = OpenAI(
    api_key="API_KEY", 
    base_url="https://api.upstage.ai/v1/solar"
)

def translate_text(text):
    try:
        response = client.chat.completions.create(
            model="solar-1-mini-translate-enko",
            messages=[
                {
                    "role": "user",
                    "content": "#Person2#'s mom lost her job. #Person2# doesn't want her mom to be depressed. #Person1# suggests #Person2# look for job leads on the internet."
                },
                {
                    "role": "assistant",
                    "content": "#Person2#의 엄마가 일자리를 잃었다. #Person2#는 엄마가 우울해하지 않기를 바란다. #Person1#은 #Person2#에게 인터넷에서 일자리 정보를 찾아보는 것을 제안한다."
                },
                {
                    "role": "user",
                    "content": "#Person1# asks #Person2# for advice on how to pack a bag for a visit to this uncle's house next Saturday."
                },
                {
                    "role": "assistant",
                    "content": "#Person1#은 다음 토요일에 이 삼촌네를 방문할 때 가방을 어떻게 싸야 할지 #Person2#에게 조언을 구합니다."
                },
                {
                    "role": "user",
                    "content": "데이브는 #Person2#가 그가 웹트래커에서 일하고 있다는 것을 추론할 수 있다는 사실에 놀란다."
                },
                {
                    "role": "assistant",
                    "content": "Dave is surprised that #Person2# can deduce that he is working for WebTracker."
                },
                {
                    "role": "user",
                    "content": "Bill tells #Person1# that he found out his roommate is Brain Locker."
                },
                {
                    "role": "assistant",
                    "content": "빌은 #Person1#에게 자신의 룸메이트가 브래인 로커라는 것을 알게 되었다고 말한다."
                },
                {
                    "role": "user",
                    "content": text
                }
            ],
            stream=False  
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error translating text: {e}")
        return None

df = pd.read_csv("./prediction/tuned_bart_xsum.csv") 

df["translated_summary"] = df["summary"].apply(translate_text)

df.to_csv("kor_tuned_bart_xsum.csv", index=False)
print("saved to output_.csv.")

saved to output_.csv.


In [25]:
translated_df = pd.read_csv("kor_tuned_bart_xsum.csv")
translated_df = translated_df.drop("summary", axis=1)
translated_df.rename(columns={"translated_summary": "summary"}, inplace=True)
translated_df.head()

Unnamed: 0,fname,summary
0,test_0,데슨 씨는 #Person1#에게 근무 시간 동안 직원들의 인스턴트 메시징 사용이...
1,test_1,#Person2#는 교통 체증에 갇혀 있다. #Person1#은 대중교통을 이용하...
2,test_2,"#Person1#은 케이트에게 마샤와 히어로가 이혼했다고 말하지만, 케이트는 ..."
3,test_3,#Person1#은 브라이언의 생일 파티에 참석한다. 브라이언은 #Person1...
4,test_4,#Person1#과 #Person2#는 올림픽 공원을 구경하고 있다. #Pers...


In [26]:
translated_df.to_csv("kor_tuned_bart_xsum.csv", index=False)

In [15]:
import optuna

def objective(trial):
    config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)
    config_data['training']['per_device_train_batch_size'] = trial.suggest_categorical('per_device_train_batch_size', [4, 8, 16])
    config_data['training']['num_train_epochs'] = trial.suggest_int('num_train_epochs', 10, 30)
    config_data['training']['warmup_ratio'] = trial.suggest_float('warmup_ratio', 0.0, 0.3)
    config_data['training']['optim'] = trial.suggest_categorical('optim', ['adamw_torch', 'adamw_hf', 'adafactor'])
    config_data['training']['gradient_accumulation_steps'] = trial.suggest_int('gradient_accumulation_steps', 1, 4)
    config_data['training']['lr_scheduler_type'] = trial.suggest_categorical('lr_scheduler_type', ['linear', 'cosine'])
    config_data['training']['fp16'] = trial.suggest_categorical('fp16', [True, False])

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(f"Running on: {device}")

    generate_model, tokenizer = load_tokenizer_and_model_for_train(config_data, device)
    print("Model and Tokenizer Loaded.")

    preprocessor = Preprocess(config_data['tokenizer']['bos_token'], config_data['tokenizer']['eos_token'])
    train_inputs_dataset, val_inputs_dataset = prepare_train_dataset(config_data, preprocessor, config_data['general']['data_path'], tokenizer)

    trainer = load_trainer_for_train(config_data, generate_model, tokenizer, train_inputs_dataset, val_inputs_dataset)

    trainer.train()

    eval_metrics = trainer.evaluate(eval_dataset=val_inputs_dataset)
    rougeL = eval_metrics.get('rougeL', 0.0)

    return rougeL
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

print(f"Best trial: {study.best_trial.value}")
print(f"Best parameters: {study.best_trial.params}")


[I 2024-09-06 04:53:56,600] A new study created in memory with name: no-name-08c06ed1-f57d-4f1c-8734-27d3d8021fff
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


---------- Make dataset complete ----------
---------- Make training arguments ----------


[34m[1mwandb[0m: Currently logged in as: [33mrmadyd0314[0m ([33mlegendki[0m). Use [1m`wandb login --relogin`[0m to force relogin


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------




Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
1,1.3512,0.487923,0.492711,0.206867,0.462027
2,0.3604,0.530458,0.484143,0.195717,0.446923
3,0.3056,0.5645,0.472936,0.188335,0.437402
4,0.2438,0.554378,0.480704,0.191542,0.44583


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                        
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:    #Person1# and Jimmy are going to meet at the gym at 3:30 p.m. to work out. #Person1# wants to work on arms and abs, but Jimmy is following a weekly schedule. So they will do legs on Friday.                                                 
GOLD:  #Person1# suggests to Jimmy that they go to the gym 

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                        
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:    #Person1# and Jimmy are going to meet at the gym at 3:30 p.m. to work out. #Person1# wants to work on arms and abs, but Jimmy is following a weekly schedule. So they will do legs on Friday.                                                 
GOLD:  #Person1# suggests to Jimmy that they go to the gym 

[I 2024-09-06 05:35:33,768] Trial 0 finished with value: 0.0 and parameters: {'learning_rate': 2.7239932412675317e-05, 'per_device_train_batch_size': 4, 'num_train_epochs': 12, 'warmup_ratio': 0.2534231630584237, 'optim': 'adamw_hf', 'gradient_accumulation_steps': 1, 'lr_scheduler_type': 'linear', 'fp16': True}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▁▅█▇▁
eval/rouge-1,█▅▁▄█
eval/rouge-2,█▄▁▂█
eval/rouge-l,█▄▁▃█
eval/runtime,▃▂█▁▃
eval/samples_per_second,▆▇▁█▆
eval/steps_per_second,▆▇▁█▆
train/epoch,▁▁▃▃▆▆████
train/global_step,▁▁▃▃▆▆████
train/learning_rate,▁▅█▇

0,1
eval/loss,0.48792
eval/rouge-1,0.49271
eval/rouge-2,0.20687
eval/rouge-l,0.46203
eval/runtime,83.1418
eval/samples_per_second,6.002
eval/steps_per_second,0.758
train/epoch,4.0
train/global_step,12460.0
train/learning_rate,2e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112373694777489, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
0,1.0439,0.541417,0.486044,0.197935,0.45532
2,0.3331,0.581474,0.465344,0.177732,0.43045
4,0.2528,0.627762,0.470438,0.177841,0.435897


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person2# doesn't have any known allergies. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                 
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy are going to do a workout. #Person1# wants to do arms and abs, but Jimmy is following a weekly schedule. They decide to do legs.                                                 
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him t

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person2# doesn't have any known allergies. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                 
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy are going to do a workout. #Person1# wants to do arms and abs, but Jimmy is following a weekly schedule. They decide to do legs.                                                 
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him t

[I 2024-09-06 06:12:16,387] Trial 1 finished with value: 0.0 and parameters: {'learning_rate': 8.370249530886087e-05, 'per_device_train_batch_size': 4, 'num_train_epochs': 16, 'warmup_ratio': 0.11583920996653135, 'optim': 'adamw_torch', 'gradient_accumulation_steps': 2, 'lr_scheduler_type': 'cosine', 'fp16': True}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▁▃▄█▁
eval/rouge-1,█▃▁▃█
eval/rouge-2,█▂▁▁█
eval/rouge-l,█▂▁▃█
eval/runtime,▂█▆▁▂
eval/samples_per_second,▇▁▂█▇
eval/steps_per_second,▇▁▂█▇
train/epoch,▁▁▃▃▆▆████
train/global_step,▁▁▃▃▆▆████
train/learning_rate,▁██▇

0,1
eval/loss,0.54142
eval/rouge-1,0.48604
eval/rouge-2,0.19793
eval/rouge-l,0.45532
eval/runtime,78.8582
eval/samples_per_second,6.328
eval/steps_per_second,0.799
train/epoch,4.0
train/global_step,6230.0
train/learning_rate,8e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112545885973506, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
0,1.8396,0.470637,0.489301,0.205615,0.458128
2,0.3035,0.518171,0.499725,0.209777,0.465211
4,0.2589,0.513081,0.496541,0.210201,0.460283


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:   #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                          
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# suggests to Jimmy that they go to the gym to work out later. They will meet at the gym at 3:30 p.m. on Friday.                                                                
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arms and stomach.  

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:   #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                          
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# suggests to Jimmy that they go to the gym to work out later. They will meet at the gym at 3:30 p.m. on Friday.                                                                
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arms and stomach.  

[I 2024-09-06 06:42:02,599] Trial 2 finished with value: 0.0 and parameters: {'learning_rate': 1.7446279371777423e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 12, 'warmup_ratio': 0.0503207359359519, 'optim': 'adafactor', 'gradient_accumulation_steps': 2, 'lr_scheduler_type': 'cosine', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▁▁█▇▁
eval/rouge-1,▁▄█▆▁
eval/rouge-2,▁▅▇█▁
eval/rouge-l,▁▂█▃▁
eval/runtime,▅█▁▆▅
eval/samples_per_second,▄▁█▃▄
eval/steps_per_second,▄▁█▃▄
train/epoch,▁▁▃▃▆▆████
train/global_step,▁▁▃▃▆▆████
train/learning_rate,█▇▄▁

0,1
eval/loss,0.47064
eval/rouge-1,0.4893
eval/rouge-2,0.20561
eval/rouge-l,0.45813
eval/runtime,84.1187
eval/samples_per_second,5.932
eval/steps_per_second,0.749
train/epoch,4.0
train/global_step,1558.0
train/learning_rate,1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112276713053386, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------




Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
0,1.9896,0.484201,0.485074,0.195482,0.453211
1,0.3677,0.501356,0.490396,0.203188,0.457538
3,0.2489,0.55664,0.484865,0.190816,0.445509


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:   #Person2# is having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                           
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy are going to meet at the gym at 3:30 p.m. to work out. #Person1# wants to work on arms and abs, but Jimmy is following a weekly schedule, so they will do legs on Friday.                                                
GOLD:  #Person1# suggests to Jimmy that they go to the gym and c

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:   #Person2# is having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                           
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy are going to meet at the gym at 3:30 p.m. to work out. #Person1# wants to work on arms and abs, but Jimmy is following a weekly schedule, so they will do legs on Friday.                                                
GOLD:  #Person1# suggests to Jimmy that they go to the gym and c

[I 2024-09-06 07:17:57,943] Trial 3 finished with value: 0.0 and parameters: {'learning_rate': 2.5233792027525678e-05, 'per_device_train_batch_size': 4, 'num_train_epochs': 19, 'warmup_ratio': 0.15166645816117616, 'optim': 'adamw_hf', 'gradient_accumulation_steps': 3, 'lr_scheduler_type': 'linear', 'fp16': False}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▁▃▅█▁
eval/rouge-1,▁██▁▁
eval/rouge-2,▄█▆▁▄
eval/rouge-l,▅█▆▁▅
eval/runtime,█▆▁██
eval/samples_per_second,▁▃█▁▁
eval/steps_per_second,▁▃█▁▁
train/epoch,▁▁▃▃▆▆████
train/global_step,▁▁▃▃▆▆████
train/learning_rate,▁▅█▇

0,1
eval/loss,0.4842
eval/rouge-1,0.48507
eval/rouge-2,0.19548
eval/rouge-l,0.45321
eval/runtime,85.2454
eval/samples_per_second,5.854
eval/steps_per_second,0.739
train/epoch,4.0
train/global_step,4153.0
train/learning_rate,2e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112357593244977, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
1,2.1005,0.547666,0.489679,0.200944,0.454819
2,0.374,0.540174,0.481555,0.194338,0.444506
3,0.324,0.595653,0.474285,0.184776,0.436766
4,0.2731,0.592952,0.488478,0.197618,0.453346
5,0.2167,0.606459,0.475266,0.182846,0.434732


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# is having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                         
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# suggests to Jimmy that they go to the gym to work out later. They will meet at the gym at 3:30 p.m. and work on arms and abs.                                                            
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arms and stom

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person1# is going to send #Person2# to the pulmonologist to get checked for asthma.                                                                     
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:    #Person1# and Jimmy are going to meet at the gym at 3:30 to work out. #Person1# wants to work on arms and abs, but Jimmy is following a weekly schedule, so they will work on legs on Friday.                                                    
GOLD:  #Person1# suggests to Jimmy that they

[I 2024-09-06 08:00:06,393] Trial 4 finished with value: 0.0 and parameters: {'learning_rate': 3.6019589552277936e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 14, 'warmup_ratio': 0.26517440475777193, 'optim': 'adafactor', 'gradient_accumulation_steps': 2, 'lr_scheduler_type': 'cosine', 'fp16': True}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▂▁▇▇█▁
eval/rouge-1,█▄▁▇▁▄
eval/rouge-2,█▅▂▇▁▅
eval/rouge-l,█▄▂▇▁▄
eval/runtime,▇██▂▁█
eval/samples_per_second,▂▁▁▇█▁
eval/steps_per_second,▂▁▁▇█▁
train/epoch,▁▁▃▃▅▅▆▆████
train/global_step,▁▁▃▃▅▅▆▆████
train/learning_rate,▁▄▆██

0,1
eval/loss,0.54017
eval/rouge-1,0.48155
eval/rouge-2,0.19434
eval/rouge-l,0.44451
eval/runtime,88.5101
eval/samples_per_second,5.638
eval/steps_per_second,0.712
train/epoch,5.0
train/global_step,3895.0
train/learning_rate,3e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112295380896992, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------




Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
0,3.7007,0.555305,0.475483,0.184123,0.44344
2,0.3451,0.50221,0.498427,0.21133,0.465208
4,0.2541,0.526462,0.493851,0.204097,0.458359


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# is having trouble breathing lately. The doctor will send #Person2# to a pulmonologist to check for asthma.                                                                         
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy will meet at the gym at 3:30 to work out. They will do legs and arms and abs on Friday.                                                                     
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arms and stomach.              

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:   #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                          
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# suggests to Jimmy that they work out later because #Person1#'s legs are sore. They will meet at the gym at 3:30 and work on arms and abs.                                                              
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work ou

[I 2024-09-06 08:35:37,750] Trial 5 finished with value: 0.0 and parameters: {'learning_rate': 2.146545268854367e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 20, 'warmup_ratio': 0.20688910271024513, 'optim': 'adamw_hf', 'gradient_accumulation_steps': 2, 'lr_scheduler_type': 'linear', 'fp16': True}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,█▁▂▅▅▁
eval/rouge-1,▁▆█▆▇▆
eval/rouge-2,▁▇█▆▆▇
eval/rouge-l,▁▆█▆▆▆
eval/runtime,▆▆▁█▁▅
eval/samples_per_second,▃▃█▁█▄
eval/steps_per_second,▃▃█▁█▄
train/epoch,▁▁▃▃▅▅▆▆████
train/global_step,▁▁▃▃▅▅▆▆████
train/learning_rate,▁▃▆██

0,1
eval/loss,0.49456
eval/rouge-1,0.49138
eval/rouge-2,0.20582
eval/rouge-l,0.45985
eval/runtime,86.2816
eval/samples_per_second,5.783
eval/steps_per_second,0.73
train/epoch,5.0
train/global_step,1947.0
train/learning_rate,2e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111245589951674, max=1.0)…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
1,1.2628,0.577013,0.486024,0.19691,0.454225
2,0.3811,0.670786,0.475736,0.183461,0.437305
3,0.3063,0.573045,0.468057,0.178936,0.434167
4,0.2233,0.649048,0.478028,0.18199,0.441871
5,0.1585,0.68222,0.46301,0.174822,0.419079
6,0.1089,0.709573,0.469665,0.180969,0.43464


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                        
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# asks Jimmy to go to the gym with him to do a workout. They are going to meet at the gym at 3:30 p.m. today.                                                               
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arms and stomach.      

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person1# is going to send #Person2# to the pulmonologist to get checked for asthma.                                                               
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy are going to work out together. #Person1# wants to work on arms and abs, but Jimmy says they can only do legs on Friday.                                                           
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out hi

[I 2024-09-06 09:25:13,771] Trial 6 finished with value: 0.0 and parameters: {'learning_rate': 7.167326765478392e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 12, 'warmup_ratio': 0.14315176816487674, 'optim': 'adafactor', 'gradient_accumulation_steps': 2, 'lr_scheduler_type': 'cosine', 'fp16': True}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▁▆▁▅▇█▁
eval/rouge-1,█▅▃▆▁▃▃
eval/rouge-2,█▄▂▃▁▃▂
eval/rouge-l,█▅▄▆▁▄▄
eval/runtime,▆▂▅▁▅█▅
eval/samples_per_second,▃▇▃█▄▁▃
eval/steps_per_second,▃▇▃█▄▁▃
train/epoch,▁▁▂▂▄▄▅▅▇▇████
train/global_step,▁▁▂▂▄▄▅▅▇▇████
train/learning_rate,▁█▇▆▄▂

0,1
eval/loss,0.57305
eval/rouge-1,0.46806
eval/rouge-2,0.17894
eval/rouge-l,0.43417
eval/runtime,81.1268
eval/samples_per_second,6.151
eval/steps_per_second,0.777
train/epoch,6.0
train/global_step,4674.0
train/learning_rate,5e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112187554438909, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
0,3.799,0.63857,0.472849,0.181471,0.438021
1,0.4305,0.557299,0.487386,0.20127,0.45222
2,0.3589,0.550129,0.475,0.190996,0.440077
4,0.2655,0.560214,0.491466,0.202117,0.458038
5,0.2221,0.523105,0.494312,0.20201,0.455272
6,0.1865,0.633467,0.490488,0.200202,0.454881
8,0.132,0.68055,0.480744,0.192294,0.4413


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# is having trouble breathing lately. The doctor will send #Person2# to a pulmonologist to check for asthma.                                                                         
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy will meet at the gym at 3:30 to work out. They will work on arms and abs today, but Jimmy wants to do legs on Friday.                                                              
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arms and

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:  #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                           
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# suggests to Jimmy that they work out later. #Person1#'s legs are sore, so they decide to work on their arms and abs on Friday.                                                                  
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his a

[I 2024-09-06 10:25:43,700] Trial 7 finished with value: 0.0 and parameters: {'learning_rate': 2.69932093997759e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 25, 'warmup_ratio': 0.11909364113541078, 'optim': 'adafactor', 'gradient_accumulation_steps': 4, 'lr_scheduler_type': 'linear', 'fp16': True}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▅▂▂▁▂▁▅█▇▁
eval/rouge-1,▁▆▂█▇█▇▇▄█
eval/rouge-2,▁▇▄█▇▇▆▇▄▇
eval/rouge-l,▁▅▂█▇▆▆▆▂▆
eval/runtime,▅▄█▃▃▄▁▃▂▄
eval/samples_per_second,▄▄▁▆▆▅█▆▇▅
eval/steps_per_second,▄▄▁▆▆▅█▆▇▅
train/epoch,▁▁▂▂▃▃▄▄▅▅▅▅▆▆▇▇████
train/global_step,▁▁▂▂▃▃▄▄▅▅▅▅▆▆▇▇████
train/learning_rate,▁▅██▇▇▆▆▅

0,1
eval/loss,0.5231
eval/rouge-1,0.49431
eval/rouge-2,0.20201
eval/rouge-l,0.45527
eval/runtime,86.115
eval/samples_per_second,5.795
eval/steps_per_second,0.732
train/epoch,9.0
train/global_step,1752.0
train/learning_rate,2e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112470759285821, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
1,1.9876,0.559607,0.479388,0.192807,0.446594
2,0.3699,0.53284,0.49593,0.207988,0.458283
3,0.3203,0.532604,0.487009,0.201121,0.451684
4,0.2684,0.497377,0.482125,0.190057,0.44687
5,0.207,0.539264,0.481212,0.189909,0.448554
6,0.1526,0.651796,0.481705,0.18993,0.443658
7,0.1111,0.704547,0.486354,0.193307,0.447473


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                                        
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# and Jimmy are going to meet at the gym at 3:30 p.m. to work out. #Person1# wants to work on legs and forearms, but Jimmy wants to do arms and abs.                                                      
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work o

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:  #Person2# has been having trouble breathing lately. #Person1# will send #Person2# to a pulmonologist to check for asthma.                                                           
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# suggests to Jimmy to work out with arms and abs because #Person1#'s legs are sore. Jimmy refuses because he is following a weekly schedule.                                                   
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arms and stomach.  

[I 2024-09-06 11:22:56,457] Trial 8 finished with value: 0.0 and parameters: {'learning_rate': 2.161615123058077e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 14, 'warmup_ratio': 0.2641602830450122, 'optim': 'adamw_torch', 'gradient_accumulation_steps': 1, 'lr_scheduler_type': 'linear', 'fp16': True}. Best is trial 0 with value: 0.0.
  config_data['training']['learning_rate'] = trial.suggest_loguniform('learning_rate', 1e-5, 1e-4)


Running on: cuda:0
---------- Load tokenizer & model ----------
---------- Model Name : knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM ----------
BartConfig {
  "_name_or_path": "knkarthick/MEETING-SUMMARY-BART-LARGE-XSUM-SAMSUM-DIALOGSUM",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "eos_token_ids": [
    2
  ],
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "i

VBox(children=(Label(value='1550.164 MB of 1550.164 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
eval/loss,▃▂▂▁▂▆█▁
eval/rouge-1,▁█▄▂▂▂▄▂
eval/rouge-2,▂█▅▁▁▁▂▁
eval/rouge-l,▂█▅▃▃▁▃▃
eval/runtime,█▅▆▂▁▆▃▁
eval/samples_per_second,▁▄▃▇█▃▅▇
eval/steps_per_second,▁▄▃▇█▃▅▇
train/epoch,▁▁▂▂▃▃▅▅▆▆▇▇████
train/global_step,▁▁▂▂▃▃▅▅▆▆▇▇████
train/learning_rate,▁▄▆█▇▆▅

0,1
eval/loss,0.49738
eval/rouge-1,0.48212
eval/rouge-2,0.19006
eval/rouge-l,0.44687
eval/runtime,68.1773
eval/samples_per_second,7.319
eval/steps_per_second,0.924
train/epoch,7.0
train/global_step,10906.0
train/learning_rate,1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112167893184557, max=1.0…

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------- Make training arguments complete ----------
---------- Make trainer ----------
---------- Make trainer complete ----------


Epoch,Training Loss,Validation Loss,Rouge-1,Rouge-2,Rouge-l
1,1.4966,0.539684,0.496988,0.212763,0.462928
2,0.3635,0.540775,0.477699,0.186917,0.43749
3,0.2885,0.547872,0.4769,0.190649,0.442133
4,0.2223,0.556881,0.490751,0.199874,0.456069


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person1# is going to send #Person2# to a pulmonologist to check for asthma.                                                                      
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# suggests to Jimmy that they go to the gym to work out later. They will meet at the gym at 3:30 p.m. and work on arms and abs.                                                            
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arm

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person2# has been having trouble breathing lately. #Person1# is going to send #Person2# to a pulmonologist to check for asthma.                                                                      
GOLD:  #Person2# has difficulty breathing. The doctor asks #Person1# about this, and will send #Person2# to a lung specialist.                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------
PRED:     #Person1# suggests to Jimmy that they go to the gym to work out later. They will meet at the gym at 3:30 p.m. and work on arms and abs.                                                            
GOLD:  #Person1# suggests to Jimmy that they go to the gym and convinces him to work out his arm

[I 2024-09-06 12:01:23,878] Trial 9 finished with value: 0.0 and parameters: {'learning_rate': 1.8616690452076175e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 10, 'warmup_ratio': 0.147831780070348, 'optim': 'adafactor', 'gradient_accumulation_steps': 1, 'lr_scheduler_type': 'linear', 'fp16': False}. Best is trial 0 with value: 0.0.


Best trial: 0.0
Best parameters: {'learning_rate': 2.7239932412675317e-05, 'per_device_train_batch_size': 4, 'num_train_epochs': 12, 'warmup_ratio': 0.2534231630584237, 'optim': 'adamw_hf', 'gradient_accumulation_steps': 1, 'lr_scheduler_type': 'linear', 'fp16': True}
