# Abstractive summaries - Train DistilBART on TWEETSUMM dataset

In [1]:
from huggingface_hub import login
import pandas as pd
import numpy as np
import os, time, datetime, shutil

from datasets import Dataset, DatasetDict

from transformers import DataCollatorForSeq2Seq, AutoTokenizer, set_seed
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl

import wandb

In [2]:
!pip freeze > requirements_bart.txt

  pid, fd = os.forkpty()


In [3]:
def get_current_time(underscore=False):
    return datetime.datetime.now().strftime("%d%m-%H%M" if not underscore else "%d%m_%H%M")

In [4]:
run_name = f"bart-abs-{get_current_time()}"
models_dir = os.path.join(os.getcwd(), 'models')
results_dir = os.path.join(os.getcwd(), 'results', 'bart')
ds_dir = os.path.join(os.getcwd(), 'data')
print(run_name)

bart-abs-1509-0313


In [5]:
try:
    HF_TOKEN =  os.environ['HF_TOKEN']
except:
    HF_TOKEN = ""

if 'google.colab' in str(get_ipython()):
    print("Running on Colab")
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    HF_TOKEN = userdata.get('HF_TOKEN')
elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE') != None:
    ds_dir = '/kaggle/input/bertdata2207/'
    from kaggle_secrets import UserSecretsClient
    print("Running on Kaggle")
    user_secrets = UserSecretsClient()
    HF_TOKEN = user_secrets.get_secret("HF_TOKEN")
    WANDB_API_KEY = user_secrets.get_secret("WANDB_API_KEY")
    os.environ['WANDB_API_KEY'] = WANDB_API_KEY
    os.makedirs(os.path.join(os.getcwd(), "results"), exist_ok=True)
    os.makedirs(os.path.join(os.getcwd(), 'results', 'bart'), exist_ok=True)


Running on Kaggle


In [6]:
set_seed(17)

In [7]:
os.environ["WANDB_PROJECT"] = f"aiml-thesis-train-{run_name}"
os.environ["WANDB_WATCH"] = "all"
wandb.init(settings=wandb.Settings(start_method="thread"), id=run_name)

[34m[1mwandb[0m: Currently logged in as: [33mdawidk5[0m ([33mdawidk5ul[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.18.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.17.7
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240915_031356-bart-abs-1509-0313[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mbart-abs-1509-0313[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/dawidk5ul/aiml-thesis-train-bart-abs-1509-0313[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/dawidk5ul/aiml-thesis-train-bart-abs-1509-0313/runs/bart-abs-1509-0313[0m


In [8]:
login(token=HF_TOKEN)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Load data

In [9]:
print(ds_dir)

/kaggle/input/bertdata2207/


In [10]:
checkpoint_bart = "sshleifer/distilbart-xsum-12-6"

In [11]:
def csv_to_pandas(file_name, ds_dir, drop_conv_id=True):
    df = pd.read_csv(os.path.join(ds_dir, file_name), names=['conv_id', 'dialogue', 'summary'], encoding='utf-8', dtype={'conv_id': 'string', 'dialogue': 'string', 'summary': 'string'})
    df = df.convert_dtypes()
    if drop_conv_id:
        df.drop(columns=['conv_id'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

In [12]:
train_df_temp = csv_to_pandas("dials_abs_2607_1312_train_spc.csv", ds_dir)
val_df_temp = csv_to_pandas("dials_abs_2607_1312_valid_spc.csv", ds_dir)
test_df = csv_to_pandas("dials_abs_2607_1312_test_spc.csv", ds_dir, drop_conv_id=False)

print(train_df_temp.dtypes)
print(train_df_temp.head())

dialogue    string[python]
summary     string[python]
dtype: object
                                            dialogue  \
0  Customer: So neither my iPhone nor my Apple Wa...   
1  Customer: @115850 hi team! i m planning to get...   
2  Customer: @AskAmex Where do I write to address...   
3  Customer: @AmazonHelp @115821 Wow, expected 4 ...   
4  Customer: @GWRHelp I'd rather you spent some t...   

                                             summary  
0  Customer enquired about his Iphone and Apple w...  
1  Customer is eager to know about the replacemen...  
2  Signed up for an AmexCard with Delta but it di...  
3  The customer have a problem. The agent is very...  
4  Customer cannot purchase a train ticket on the...  


In [13]:
tweetsumm_abs = DatasetDict(
    {
        'train': Dataset.from_pandas(train_df_temp),
        'validation': Dataset.from_pandas(val_df_temp),
        'test': Dataset.from_pandas(test_df)
    }
)

In [14]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint_bart)
print(tokenizer)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

BartTokenizerFast(name_or_path='sshleifer/distilbart-xsum-12-6', vocab_size=50265, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	1: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	3: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	50264: AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True, special=True),
}




In [15]:
# Source: https://huggingface.co/docs/transformers/en/tasks/summarization

def preprocess_function(examples):
    prefix = "summarize: "
    inputs = [str(prefix) + str(dial) for dial in examples["dialogue"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True) # same params as tweetsumm paper
    labels = tokenizer(text_target=examples["summary"], max_length=80, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [16]:
tokenized_tweetsumm_abs = tweetsumm_abs.map(preprocess_function, batched=True, remove_columns=['dialogue','summary'])
print(tokenized_tweetsumm_abs["train"][1])

Map:   0%|          | 0/867 [00:00<?, ? examples/s]

Map:   0%|          | 0/110 [00:00<?, ? examples/s]

Map:   0%|          | 0/109 [00:00<?, ? examples/s]

{'input_ids': [0, 18581, 3916, 2072, 35, 19458, 35, 787, 1225, 4432, 1096, 20280, 165, 328, 939, 475, 1884, 7, 120, 1257, 1754, 510, 20529, 27785, 24, 924, 15, 5, 998, 24, 34, 158, 360, 5010, 21784, 6, 64, 1717, 3922, 162, 99, 16, 24, 17487, 50118, 45443, 35, 787, 2481, 3897, 2036, 166, 348, 10, 158, 7033, 5010, 714, 114, 5, 6880, 47, 829, 16, 5009, 50, 31559, 4, 37249, 10237, 50118, 44799, 35, 787, 25146, 28780, 5148, 27785, 125, 99, 114, 939, 399, 17, 27, 90, 101, 5, 1152, 8, 236, 7, 671, 24, 50118, 45443, 35, 787, 2481, 3897, 2036, 166, 1979, 75, 28, 441, 7, 3264, 5, 23312, 2886, 4, 286, 55, 335, 15, 1830, 2886, 714, 4, 17161, 352, 3753, 15, 5, 3104, 1373, 259, 35, 1205, 640, 90, 4, 876, 73, 571, 40969, 9380, 530, 4154, 510, 975, 4, 3166, 19954, 877, 110, 2969, 4, 50118, 44799, 35, 787, 25146, 28780, 5148, 2446, 27785, 125, 209, 32, 5567, 15797, 98, 473, 24, 1266, 276, 714, 3253, 13, 209, 25, 157, 50118, 45443, 35, 787, 2481, 3897, 2036, 3216, 6, 30845, 73, 5567, 15797, 32, 45, 4973

## Setup Training Evaluation

In [17]:
!pip install -U nltk

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m34.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nltk
  Attempting uninstall: nltk
    Found existing installation: nltk 3.2.4
    Uninstalling nltk-3.2.4:
      Successfully uninstalled nltk-3.2.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
preprocessing 0.1.13 requires nltk==3.2.4, but you have nltk 3.9.1 which is incompatible.[0m[31m
[0mSuccessfully installed nltk-3.9.1


In [18]:
!pip install evaluate pyrouge rouge_score bert_score meteor

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting pyrouge
  Downloading pyrouge-0.1.3.tar.gz (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.5/60.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l- done
[?25hCollecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l- done
[?25hCollecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting meteor
  Downloading meteor-2.0.16-py3-none-any.whl.metadata (8.3 kB)
Collecting bgzip<0.6.0,>=0.5.0 (from meteor)
  Downloading bgzip-0.5.0.tar.gz (100 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.2/100.2 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l- done
[?25hCollecting biom-format<3.0.0,>=2.1.15 (from meteor)
  Downloading biom-format-2.1.16.t

In [19]:
import evaluate, nltk, csv
rouge = evaluate.load("rouge")
meteor = evaluate.load("meteor")
bertscore = evaluate.load("bertscore")

nltk.download('punkt_tab')

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /usr/share/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package omw-1.4 to /usr/share/nltk_data...


Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

[nltk_data] Downloading package punkt_tab to /usr/share/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [20]:
def compute_metrics_abs(eval_pred):
    predictions, labels = eval_pred
    # Extra line added to address an overflow: https://github.com/huggingface/transformers/issues/22634
    predictions = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]

    rouge_scores = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True, use_aggregator=True)
    bert_scores = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")
    bert_scores.pop('hashcode')
    result = {
      **{f"rouge/{k}": round(v, 4) for k,v in rouge_scores.items()},
      **{f"bertscore/bertscore-{k}": round(np.mean(v), 4) for k,v in bert_scores.items()},
      'meteor': round(meteor.compute(predictions=decoded_preds, references=decoded_labels)['meteor'], 4),
    }
   
    result["gen_len"] = np.mean(prediction_lens)
    return result


## Train and Evaluate

In [21]:
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint_bart)

pytorch_model.bin:   0%|          | 0.00/611M [00:00<?, ?B/s]

In [22]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [23]:
EXPERIMENT_PARAMS = []
BASE_PARAMS = {'lr':3e-5, 'batch_size':4, 'epochs': 6}
EXPERIMENT_PARAMS.append(BASE_PARAMS)

In [24]:
my_batch = data_collator(tokenized_tweetsumm_abs['train'])
assert len(my_batch) == 4 # default setting for the model

In [25]:
LEARN_RATES = (3e-5, 3e-4, 3e-6)
BATCH_SIZES = (4, 2, 8)
EPOCHS = (6,10)

for lr in LEARN_RATES:
    for batch_size in BATCH_SIZES:
        for epoch in EPOCHS:
            if lr == BASE_PARAMS['lr'] and batch_size == BASE_PARAMS['batch_size'] and epoch == BASE_PARAMS['epochs']:
                continue
            experiment = {'lr':lr, 'batch_size':batch_size, 'epochs': epoch}
            EXPERIMENT_PARAMS.append(experiment)

In [26]:
def run_post_training(split, test_details, test_df_temp: pd.DataFrame, tokenizer, experiment, run_name_model, epoch, results_dir):
    # First line added due to label error, see 
    predictions = np.where(test_details.predictions != -100, test_details.predictions, tokenizer.pad_token_id)
    preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    test_df_temp['response'] = preds
    exp_res = {**experiment, **(test_details.metrics)}
#     if not exp_res:
#         exp_res = {k: list() for k in csv_items.keys()}
#     else:
#         for k, v in csv_items.items():
#             exp_res[k].append(v)

    test_metrics_df = pd.DataFrame([exp_res])
    test_df_temp = test_df_temp.convert_dtypes()
    test_metrics_df = test_metrics_df.convert_dtypes()
    wandb.log({run_name_model: test_details.metrics})
    preds_name = f"{split}_preds_{run_name_model.replace('-','_')}_{epoch}_bart.csv"
    metrics_name =  f"{split}_metrics_{run_name_model.replace('-','_')}_{epoch}_bart.csv"
    test_df_temp.to_csv(os.path.join(results_dir, preds_name), index=False, header=False, encoding='utf-8', quoting=csv.QUOTE_ALL)
    test_metrics_df.to_csv(os.path.join(results_dir, metrics_name), index=False, header=True, encoding='utf-8', quoting=csv.QUOTE_ALL)
    # Using wandb documentation: https://docs.wandb.ai/guides/artifacts
#     for root, dirs, files in os.walk(results_dir):
#         for file in files:
#             artifact = wandb.Artifact(name=run_name_model, type="predictions")
#             artifact.add_file(local_path=os.path.join(root, file), name=file)
#             wandb.log_artifact(artifact)


In [27]:
class ExtraCallback(TrainerCallback):
    def on_train_end(self, args, state, control, **kwargs):
        # Save and upload CSVs
        super().on_train_end(args, state, control, **kwargs)
        df = pd.DataFrame(state.log_history)
        df = df.convert_dtypes()
        df = df.groupby(['epoch'], as_index=False).sum()
        df.to_csv(os.path.join(results_dir, "log_" + args.run_name + ".csv"), header=True, index=False)

In [28]:
for count, exp in enumerate(EXPERIMENT_PARAMS):
    run_name_model = f"{run_name}-lr-{exp['lr']}-bs-{exp['batch_size']}-maxep-{exp['epochs']}"
    print("=== Starting experiment", count, f"on {get_current_time}:", run_name_model, "training")
    wandb.run.name = run_name_model
    wandb.run.save()

    training_args = Seq2SeqTrainingArguments(
        output_dir=os.path.join(models_dir, run_name_model),
        eval_strategy="epoch",
        logging_strategy="epoch",
        do_train=True,
        do_eval=True,
        learning_rate=exp['lr'],
        per_device_train_batch_size=exp['batch_size'],
        per_device_eval_batch_size=exp['batch_size'],
        weight_decay=0.01,
        save_strategy="epoch",
        save_total_limit=1,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        num_train_epochs=exp['epochs'],
        predict_with_generate=True,
        fp16=True,
        generation_max_length=80,
        push_to_hub=False,
        report_to="none",
        run_name=run_name_model,
    )
    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_tweetsumm_abs["train"], # .select(range(0,50)),
        eval_dataset=tokenized_tweetsumm_abs["validation"], # .select(range(0,10)),
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics_abs,
    )
    trainer.add_callback(ExtraCallback)
    training_start = time.time()
    trainer.train()
    training_end = time.time()
    print("Finished",  run_name_model, "- time it took for training:", str(datetime.timedelta(seconds=(training_end-training_start))))
    trainer.push_to_hub()
    test_details = trainer.predict(tokenized_tweetsumm_abs['test'], metric_key_prefix='test')
    run_post_training('test', test_details, test_df, tokenizer, exp, run_name_model, exp['epochs'], results_dir)
    shutil.rmtree(models_dir)
    os.makedirs(models_dir)
#     if count == 2:
#         break



=== Starting experiment 0 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-05-bs-4-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,2.3606,2.079204,0.44,0.2028,0.3696,0.3707,0.8966,0.8883,0.8923,0.3866,37.363636
2,1.6544,1.995182,0.4521,0.2171,0.3902,0.3909,0.8991,0.8912,0.895,0.4007,34.663636
3,1.2907,2.061387,0.4661,0.2212,0.399,0.4008,0.9006,0.8929,0.8966,0.4128,35.263636
4,1.0179,2.139555,0.479,0.2295,0.4121,0.4137,0.9024,0.8933,0.8977,0.4142,34.718182
5,0.8112,2.265783,0.4737,0.2237,0.4046,0.405,0.8989,0.8931,0.8959,0.4126,35.427273
6,0.6745,2.361533,0.4759,0.2278,0.407,0.4084,0.898,0.8937,0.8957,0.4311,37.0


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repea

Finished bart-abs-1509-0313-lr-3e-05-bs-4-maxep-6 - time it took for training: 0:14:07.971742


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 1 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-05-bs-4-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,1.2307,2.131164,0.4591,0.2098,0.3873,0.3883,0.8977,0.892,0.8947,0.4036,37.490909
2,0.888,2.205288,0.4516,0.2075,0.3834,0.3857,0.8957,0.8906,0.893,0.3977,35.227273
3,0.784,2.369519,0.4573,0.2136,0.3893,0.3911,0.8972,0.8904,0.8937,0.4003,34.8
4,0.5673,2.621825,0.4714,0.2118,0.3982,0.3996,0.8947,0.8924,0.8934,0.4235,39.272727
5,0.4163,2.915056,0.4683,0.2131,0.4005,0.4023,0.8958,0.8916,0.8935,0.4129,36.754545
6,0.3021,3.096152,0.4648,0.2045,0.3918,0.3935,0.8967,0.893,0.8947,0.4119,37.063636
7,0.2266,3.278224,0.4639,0.2074,0.3907,0.3925,0.8942,0.8941,0.894,0.4203,38.927273
8,0.1684,3.419846,0.4565,0.1964,0.3822,0.3841,0.8934,0.8905,0.8918,0.4035,37.181818
9,0.1347,3.487839,0.4723,0.2189,0.3987,0.4005,0.8954,0.8957,0.8954,0.4308,39.527273
10,0.1124,3.533939,0.4605,0.2065,0.3887,0.3902,0.8931,0.8914,0.8921,0.4157,37.836364


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-05-bs-4-maxep-10 - time it took for training: 0:23:23.357970


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

=== Starting experiment 2 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-05-bs-2-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,1.1204,2.219899,0.4542,0.2173,0.3843,0.3855,0.8945,0.8893,0.8917,0.4072,37.227273
2,0.8222,2.354901,0.4613,0.2095,0.3935,0.3957,0.8994,0.8929,0.896,0.4089,36.881818
3,0.565,2.665166,0.4686,0.2079,0.3905,0.3911,0.8943,0.8941,0.894,0.4207,39.663636
4,0.379,2.923901,0.4614,0.2076,0.3937,0.3951,0.8962,0.8898,0.8928,0.401,34.854545
5,0.2543,3.184867,0.4629,0.2086,0.3988,0.3998,0.8958,0.8914,0.8935,0.4076,36.109091
6,0.1761,3.348588,0.4705,0.2108,0.3877,0.3894,0.8936,0.8936,0.8934,0.4277,38.409091


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-05-bs-2-maxep-6 - time it took for training: 0:16:42.754004


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 3 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-05-bs-2-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.6813,2.458762,0.4636,0.2101,0.3907,0.3921,0.8975,0.8904,0.8937,0.4099,35.072727
2,0.6702,2.537669,0.4448,0.1862,0.3725,0.3735,0.8942,0.8887,0.8913,0.3825,35.727273
3,0.4591,2.876213,0.4533,0.1916,0.3767,0.3778,0.8961,0.8897,0.8928,0.3911,35.209091
4,0.3165,3.212897,0.4519,0.1976,0.3803,0.3806,0.8936,0.891,0.8922,0.4023,37.636364
5,0.2222,3.49714,0.47,0.2049,0.392,0.3924,0.8959,0.8926,0.8941,0.4107,36.554545
6,0.1596,3.640472,0.4607,0.2101,0.3853,0.3879,0.8943,0.8908,0.8924,0.4021,37.227273
7,0.1166,3.782679,0.4759,0.2191,0.4086,0.4106,0.8988,0.8928,0.8956,0.4173,35.572727
8,0.0891,3.938782,0.4677,0.2047,0.3905,0.3925,0.8933,0.8927,0.8929,0.417,38.7
9,0.0695,3.958333,0.4775,0.2116,0.4032,0.4051,0.8981,0.8931,0.8955,0.4228,36.318182
10,0.0592,4.018008,0.4724,0.2094,0.3964,0.3976,0.8964,0.8932,0.8947,0.4217,36.881818


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-05-bs-2-maxep-10 - time it took for training: 0:27:34.011005


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

=== Starting experiment 4 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-05-bs-8-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.6107,2.478363,0.449,0.1974,0.3774,0.3776,0.8943,0.8904,0.8922,0.3981,36.918182
2,0.3993,2.798401,0.4656,0.2145,0.3954,0.3965,0.8975,0.8914,0.8943,0.408,35.136364
3,0.2779,3.056318,0.4669,0.2112,0.3981,0.3995,0.8961,0.8905,0.8931,0.4088,36.054545
4,0.2038,3.240969,0.4639,0.2052,0.3895,0.3904,0.896,0.8949,0.8953,0.4109,37.9
5,0.1606,3.326256,0.4582,0.2063,0.391,0.392,0.8961,0.893,0.8944,0.4033,36.554545
6,0.1282,3.395661,0.4646,0.2089,0.3939,0.3945,0.8956,0.8935,0.8944,0.4132,37.5


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-05-bs-8-maxep-6 - time it took for training: 0:13:28.430654


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 5 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-05-bs-8-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.332,2.932332,0.4383,0.1841,0.3691,0.3701,0.8915,0.8887,0.8899,0.3951,37.218182
2,0.3384,3.041899,0.4611,0.2038,0.3901,0.3918,0.8941,0.8913,0.8925,0.4163,37.190909
3,0.2354,3.279278,0.445,0.1903,0.3776,0.3785,0.8938,0.8895,0.8915,0.394,36.454545
4,0.1736,3.409299,0.4545,0.2,0.3877,0.3885,0.8939,0.8921,0.8928,0.4094,38.381818
5,0.1406,3.518325,0.4634,0.2065,0.394,0.3945,0.898,0.8925,0.8951,0.4032,35.518182
6,0.1108,3.613117,0.4667,0.2075,0.3961,0.3964,0.8966,0.8936,0.8949,0.4155,37.436364
7,0.0906,3.693514,0.4602,0.2002,0.3892,0.3903,0.8922,0.8944,0.8931,0.4116,39.6
8,0.0788,3.728029,0.4704,0.212,0.4,0.4018,0.8941,0.8955,0.8946,0.431,39.872727
9,0.0708,3.760064,0.468,0.2062,0.3979,0.3994,0.8953,0.8948,0.8949,0.4244,38.972727
10,0.0637,3.768922,0.4647,0.2065,0.3953,0.3967,0.8961,0.8941,0.895,0.4195,37.954545


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-05-bs-8-maxep-10 - time it took for training: 0:21:58.666630


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 6 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-0.0003-bs-4-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,2.1071,2.549574,0.4238,0.1753,0.3521,0.3541,0.8943,0.8836,0.8888,0.3359,32.1
2,1.5392,2.586655,0.4296,0.1807,0.3556,0.3551,0.8921,0.886,0.8889,0.3598,35.090909
3,1.0328,2.6952,0.4096,0.1667,0.3444,0.3453,0.8919,0.8826,0.8871,0.3519,33.881818
4,0.62,2.912604,0.4104,0.16,0.3478,0.3487,0.8904,0.8815,0.8858,0.3524,33.427273
5,0.3251,3.324977,0.43,0.1771,0.3591,0.3598,0.8935,0.8861,0.8896,0.3744,34.963636
6,0.1503,3.854074,0.4315,0.1861,0.3638,0.3654,0.8936,0.8875,0.8904,0.3814,35.427273


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-0.0003-bs-4-maxep-6 - time it took for training: 0:13:46.596992


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 7 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-0.0003-bs-4-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,1.722,2.675594,0.4274,0.1866,0.3588,0.3592,0.8936,0.884,0.8886,0.3527,32.954545
2,1.7652,2.732115,0.416,0.1726,0.3511,0.3521,0.8944,0.8818,0.888,0.3352,31.590909
3,1.135,2.937154,0.3752,0.1441,0.3163,0.3158,0.8968,0.8736,0.8849,0.2976,26.4
4,0.9762,3.131121,0.3959,0.1535,0.3344,0.3353,0.8893,0.8777,0.8833,0.3296,33.127273
5,0.7207,3.374106,0.4028,0.1562,0.3388,0.3389,0.8889,0.8818,0.8852,0.3324,34.327273
6,0.3986,3.450408,0.4245,0.1689,0.3493,0.3501,0.892,0.8834,0.8876,0.351,34.472727
7,0.2471,3.831603,0.4096,0.1536,0.3384,0.3389,0.8922,0.8814,0.8867,0.3376,32.790909
8,0.1613,4.243933,0.4201,0.1621,0.346,0.347,0.8921,0.8815,0.8866,0.3503,33.3
9,0.0989,4.478382,0.4115,0.1499,0.3394,0.3408,0.8904,0.8825,0.8863,0.3409,34.0
10,0.0644,4.748236,0.4015,0.1493,0.329,0.3294,0.8894,0.8807,0.8849,0.3397,33.2


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-0.0003-bs-4-maxep-10 - time it took for training: 0:22:42.214602


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 8 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-0.0003-bs-2-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,2.4741,4.026882,0.2771,0.0691,0.2057,0.2053,0.8702,0.8596,0.8648,0.233,39.0
2,3.0848,3.997837,0.2554,0.0651,0.2183,0.2183,0.8646,0.8589,0.8617,0.2022,29.136364
3,1.9491,4.452434,0.2722,0.0714,0.2029,0.2031,0.8612,0.8618,0.8615,0.2582,47.0
4,1.0603,5.40223,0.2465,0.0593,0.2071,0.2071,0.8464,0.858,0.8521,0.2294,42.0
5,0.5921,6.114592,0.3035,0.072,0.2428,0.2429,0.8724,0.8571,0.8646,0.2108,29.0
6,0.3762,6.856692,0.3035,0.072,0.2428,0.2429,0.8724,0.8571,0.8646,0.2108,29.0


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-0.0003-bs-2-maxep-6 - time it took for training: 0:16:11.929453


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

=== Starting experiment 9 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-0.0003-bs-2-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,2.1693,4.520563,0.2693,0.0703,0.2315,0.232,0.8864,0.8585,0.8722,0.2188,29.0
2,1.3403,5.039476,0.3061,0.0778,0.251,0.2513,0.8875,0.864,0.8755,0.239,32.0
3,1.1783,5.133874,0.2426,0.0523,0.1835,0.1835,0.8501,0.8566,0.8533,0.248,52.0
4,0.8203,5.667829,0.3347,0.0996,0.2675,0.2678,0.8793,0.8662,0.8727,0.2663,27.0
5,0.623,6.173223,0.2961,0.0668,0.2313,0.2314,0.8628,0.8608,0.8617,0.2421,52.0
6,0.5051,6.101051,0.2953,0.0542,0.2213,0.2211,0.8685,0.8588,0.8636,0.2403,34.0
7,0.4004,6.884843,0.2613,0.0803,0.214,0.2142,0.8711,0.8469,0.8588,0.2102,26.0
8,0.3371,7.298699,0.2132,0.0353,0.1717,0.1717,0.8605,0.8522,0.8563,0.1839,27.0
9,0.2954,7.469191,0.244,0.063,0.1986,0.1986,0.8544,0.8608,0.8575,0.211,52.0
10,0.2576,7.699864,0.2439,0.0504,0.2065,0.2067,0.8544,0.8581,0.8562,0.229,44.0


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-0.0003-bs-2-maxep-10 - time it took for training: 0:26:54.945435


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

=== Starting experiment 10 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-0.0003-bs-8-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,1.3455,4.890031,0.3002,0.06,0.2217,0.2214,0.8741,0.8561,0.8649,0.2198,32.0
2,0.7475,5.617082,0.2599,0.0592,0.202,0.2017,0.8553,0.8626,0.8589,0.2583,43.0
3,0.5078,6.195126,0.2722,0.0714,0.2029,0.2031,0.8612,0.8618,0.8615,0.2582,44.0
4,0.3719,6.678996,0.3035,0.072,0.2428,0.2429,0.8724,0.8571,0.8646,0.2108,29.0
5,0.3026,7.020516,0.3101,0.0691,0.2302,0.2302,0.8569,0.8665,0.8616,0.2291,43.0
6,0.2546,7.254346,0.3097,0.0856,0.2463,0.2464,0.8589,0.8656,0.8622,0.2246,36.0


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-0.0003-bs-8-maxep-6 - time it took for training: 0:12:39.278359


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

=== Starting experiment 11 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-0.0003-bs-8-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.6961,5.508416,0.2572,0.0691,0.1962,0.1962,0.8672,0.8617,0.8644,0.2158,34.0
2,0.6838,5.749358,0.2975,0.0945,0.2493,0.2495,0.8739,0.8626,0.8681,0.2433,27.0
3,0.5113,6.021246,0.2722,0.0714,0.2029,0.2031,0.8612,0.8618,0.8615,0.2582,44.0
4,0.4108,6.595687,0.2916,0.064,0.2118,0.2121,0.8678,0.8659,0.8668,0.2243,47.0
5,0.3585,6.754153,0.2554,0.0561,0.198,0.1977,0.8531,0.8633,0.8581,0.2483,42.0
6,0.3094,6.995618,0.3041,0.0711,0.2307,0.2305,0.8646,0.8658,0.8652,0.2861,42.0
7,0.281,7.118096,0.2582,0.0781,0.2156,0.2154,0.8771,0.8626,0.8697,0.1855,29.0
8,0.261,7.271702,0.3097,0.0856,0.2463,0.2464,0.8589,0.8656,0.8622,0.2246,36.0
9,0.2415,7.444592,0.2906,0.0847,0.2272,0.2274,0.8671,0.8567,0.8618,0.1991,27.0
10,0.2228,7.582563,0.2532,0.0528,0.2067,0.2071,0.8514,0.8621,0.8567,0.2303,46.490909


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-0.0003-bs-8-maxep-10 - time it took for training: 0:20:36.019955


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

=== Starting experiment 12 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-06-bs-4-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.5854,5.793724,0.2578,0.0526,0.1861,0.1862,0.8466,0.8559,0.8512,0.265,55.0
2,0.5363,5.947269,0.2723,0.0677,0.226,0.2265,0.8613,0.855,0.8581,0.2229,30.0
3,0.4911,6.040256,0.3064,0.0749,0.2182,0.2185,0.865,0.8652,0.8651,0.2255,37.0
4,0.4651,6.104637,0.3064,0.0749,0.2182,0.2185,0.865,0.8652,0.8651,0.2255,37.0
5,0.4476,6.165938,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
6,0.4415,6.178806,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-06-bs-4-maxep-6 - time it took for training: 0:13:45.190587


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 13 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-06-bs-4-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.4698,6.033186,0.2584,0.0526,0.1868,0.1869,0.8466,0.8559,0.8512,0.2654,55.0
2,0.4866,6.164386,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
3,0.4497,6.226794,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
4,0.4248,6.303143,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
5,0.4054,6.402435,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
6,0.3937,6.467465,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
7,0.3833,6.503994,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
8,0.3761,6.526992,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
9,0.374,6.545361,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
10,0.3686,6.551795,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-06-bs-4-maxep-10 - time it took for training: 0:22:27.861745


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 14 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-06-bs-2-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.3628,6.231353,0.2519,0.0551,0.191,0.191,0.8502,0.8569,0.8535,0.2501,50.8
2,0.3799,6.449799,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
3,0.4173,6.455257,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
4,0.3921,6.528257,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
5,0.3833,6.558248,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
6,0.378,6.572188,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-06-bs-2-maxep-6 - time it took for training: 0:16:07.499095


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 15 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-06-bs-2-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.3072,6.460134,0.2705,0.0674,0.225,0.2255,0.861,0.855,0.8579,0.2231,30.454545
2,0.3542,6.682418,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
3,0.3893,6.646208,0.3097,0.0856,0.2463,0.2464,0.8585,0.8656,0.862,0.2246,36.0
4,0.363,6.728342,0.3097,0.0856,0.2463,0.2464,0.8585,0.8656,0.862,0.2246,36.0
5,0.3521,6.77418,0.3097,0.0856,0.2463,0.2464,0.8585,0.8656,0.862,0.2246,36.0
6,0.3425,6.810714,0.2439,0.0504,0.2065,0.2067,0.8544,0.8581,0.8562,0.229,45.0
7,0.3306,6.843299,0.2522,0.07,0.2096,0.2101,0.8377,0.8632,0.8502,0.2476,47.936364
8,0.3259,6.865322,0.2515,0.0697,0.2094,0.21,0.8374,0.8631,0.85,0.2474,48.0
9,0.3228,6.862096,0.2515,0.0697,0.2094,0.21,0.8374,0.8631,0.85,0.2474,48.0
10,0.3193,6.871887,0.2515,0.0697,0.2094,0.21,0.8374,0.8631,0.85,0.2474,48.0


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-06-bs-2-maxep-10 - time it took for training: 0:27:40.860328


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 16 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-06-bs-8-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.2815,6.62452,0.323,0.081,0.2516,0.2521,0.8764,0.8641,0.8702,0.2583,36.0
2,0.3942,6.68904,0.2515,0.0697,0.2094,0.21,0.8374,0.8631,0.85,0.2474,48.0
3,0.3862,6.669157,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
4,0.3708,6.662379,0.2439,0.0504,0.2065,0.2067,0.8544,0.8581,0.8562,0.229,45.0
5,0.3679,6.679515,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
6,0.3629,6.680861,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-06-bs-8-maxep-6 - time it took for training: 0:12:44.728306


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

=== Starting experiment 17 on <function get_current_time at 0x7fae739435b0>: bart-abs-1509-0313-lr-3e-06-bs-8-maxep-10 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.2585,6.718752,0.323,0.081,0.2516,0.2521,0.8764,0.8641,0.8702,0.2583,36.0
2,0.3824,6.760242,0.2515,0.0697,0.2094,0.21,0.8374,0.8631,0.85,0.2474,48.0
3,0.3739,6.740879,0.3111,0.0793,0.2212,0.2213,0.8659,0.864,0.8649,0.228,36.0
4,0.3565,6.731381,0.2439,0.0504,0.2065,0.2067,0.8544,0.8581,0.8562,0.229,45.0
5,0.3514,6.76477,0.2553,0.0701,0.2111,0.2116,0.839,0.8634,0.8509,0.2466,47.454545
6,0.3434,6.76889,0.271,0.0725,0.2029,0.2034,0.8612,0.8618,0.8614,0.2584,45.0
7,0.335,6.784513,0.2722,0.0714,0.2029,0.2031,0.8612,0.8618,0.8615,0.2582,45.0
8,0.337,6.787004,0.2601,0.0623,0.1995,0.1999,0.8579,0.8595,0.8586,0.2431,45.0
9,0.3328,6.795002,0.2515,0.0697,0.2094,0.21,0.8374,0.8631,0.85,0.2474,48.0
10,0.34,6.793319,0.2524,0.0705,0.2098,0.2104,0.8386,0.8631,0.8506,0.2495,47.863636


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_l

Finished bart-abs-1509-0313-lr-3e-06-bs-8-maxep-10 - time it took for training: 0:21:17.978019


Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

In [29]:
def log_csv_wandb(results_path, run_name_model):
    for root, dirs, files in os.walk(results_path):
        for file in files:
            artifact = wandb.Artifact(name=run_name_model, type="predictions")
            artifact.add_file(local_path=os.path.join(root, file), name=file)
            wandb.log_artifact(artifact)

In [30]:
!ls

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


__notebook__.ipynb  models  requirements_bart.txt  results  wandb


In [31]:
log_csv_wandb(results_dir, run_name_model)

In [32]:
print("Finished all training and evaluation for", run_name)
wandb.finish()

Finished all training and evaluation for bart-abs-1509-0313


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 🚀 View run [33mbart-abs-1509-0313[0m at: [34m[4mhttps://wandb.ai/dawidk5ul/aiml-thesis-train-bart-abs-1509-0313/runs/bart-abs-1509-0313[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/dawidk5ul/aiml-thesis-train-bart-abs-1509-0313[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 54 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20240915_031356-bart-abs-1509-0313/logs[0m


In [33]:
print("Results uploaded")

Results uploaded
