# Abstractive summaries - Train DistilBART on TWEETSUMM dataset

In [1]:
from huggingface_hub import login
import pandas as pd
import numpy as np
import os, time, datetime

from datasets import Dataset, DatasetDict

from transformers import DataCollatorForSeq2Seq, AutoTokenizer, set_seed
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl

import wandb

In [2]:
!pip freeze > requirements_bart.txt

  pid, fd = os.forkpty()


In [3]:
ds_dir = os.path.join(os.getcwd(), 'data')
try:
    HF_TOKEN =  os.environ['HF_TOKEN']
except:
    HF_TOKEN = ""

if 'google.colab' in str(get_ipython()):
    print("Running on Colab")
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    HF_TOKEN = userdata.get('HF_TOKEN')
elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE') != None:
    ds_dir = '/kaggle/input/bertdata2207/'
    # ds_dir="/kaggle/input/bertdata2207/"
    from kaggle_secrets import UserSecretsClient
    print("Running on Kaggle")
    # ds_dir = "/kaggle/input/tweet-data-2106-1512/"
    user_secrets = UserSecretsClient()
    HF_TOKEN = user_secrets.get_secret("HF_TOKEN")
    WANDB_API_KEY = user_secrets.get_secret("WANDB_API_KEY")
    os.environ['WANDB_API_KEY'] = WANDB_API_KEY
    os.makedirs(os.path.join(os.getcwd(), "results"), exist_ok=True)


Running on Kaggle


In [4]:
set_seed(17)

In [5]:
def get_current_time():
    return datetime.datetime.now().strftime("%d%m-%H%M")

In [6]:
run_name = f"bart-abs-{get_current_time()}"

In [7]:
os.environ["WANDB_PROJECT"] = "aiml-thesis-train-test-temp"
os.environ["WANDB_WATCH"] = "all"
wandb.init(settings=wandb.Settings(start_method="thread"), id=run_name)

[34m[1mwandb[0m: Currently logged in as: [33mdawidk5[0m ([33mdawidk5ul[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
login(token=HF_TOKEN)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Load data

In [9]:
print(ds_dir)

/kaggle/input/bertdata2207/


In [10]:
checkpoint_bart = "sshleifer/distilbart-xsum-12-6"

In [11]:
train_df_temp = pd.read_csv(os.path.join(ds_dir,"dials_abs_2607_1312_train_spc.csv"), names=['conv_id','dialogue','summary'], encoding='utf-8', dtype={'conv_id':'string', 'dialogue':'string', 'summary': 'string'})
train_df_temp = train_df_temp.convert_dtypes()
train_df_temp.drop(columns=['conv_id'], inplace=True)
train_df_temp.reset_index(drop=True, inplace=True)

val_df_temp = pd.read_csv(os.path.join(ds_dir,"dials_abs_2607_1312_valid_spc.csv"), names=['conv_id','dialogue','summary'], encoding='utf-8', dtype={'conv_id':'string', 'dialogue':'string', 'summary': 'string'})
val_df_temp = val_df_temp.convert_dtypes()
val_df_temp.drop(columns=['conv_id'], inplace=True)
val_df_temp.reset_index(drop=True, inplace=True)

test_df_temp = pd.read_csv(os.path.join(ds_dir,"dials_abs_2607_1312_test_spc.csv"), names=['conv_id','dialogue','summary'], encoding='utf-8', dtype={'conv_id':'string', 'dialogue':'string', 'summary': 'string'})
test_df_temp = test_df_temp.convert_dtypes()
test_df_temp.reset_index(drop=True, inplace=True)

print(train_df_temp.dtypes)
print(train_df_temp.head())

PD_DATASETS = {'train': train_df_temp, 'validation': val_df_temp, 'test': test_df_temp}

dialogue    string[python]
summary     string[python]
dtype: object
                                            dialogue  \
0  Customer: So neither my iPhone nor my Apple Wa...   
1  Customer: @115850 hi team! i m planning to get...   
2  Customer: @AskAmex Where do I write to address...   
3  Customer: @AmazonHelp @115821 Wow, expected 4 ...   
4  Customer: @GWRHelp I'd rather you spent some t...   

                                             summary  
0  Customer enquired about his Iphone and Apple w...  
1  Customer is eager to know about the replacemen...  
2  Signed up for an AmexCard with Delta but it di...  
3  The customer have a problem. The agent is very...  
4  Customer cannot purchase a train ticket on the...  


In [12]:
tweetsumm_abs = DatasetDict(
    {
        'train': Dataset.from_pandas(train_df_temp),
        'validation': Dataset.from_pandas(val_df_temp),
        'test': Dataset.from_pandas(test_df_temp)
    }
)

In [13]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint_bart)
print(tokenizer)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

BartTokenizerFast(name_or_path='sshleifer/distilbart-xsum-12-6', vocab_size=50265, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	1: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	3: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	50264: AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True, special=True),
}




In [14]:
# Source: https://huggingface.co/docs/transformers/en/tasks/summarization

def preprocess_function(examples):
    prefix = "summarize: "
    inputs = [str(prefix) + str(dial) for dial in examples["dialogue"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True) # same params as tweetsumm paper
    labels = tokenizer(text_target=examples["summary"], max_length=80, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [15]:
tokenized_tweetsumm_abs = tweetsumm_abs.map(preprocess_function, batched=True, remove_columns=['dialogue','summary'])
print(tokenized_tweetsumm_abs["train"][1])

Map:   0%|          | 0/867 [00:00<?, ? examples/s]

Map:   0%|          | 0/110 [00:00<?, ? examples/s]

Map:   0%|          | 0/109 [00:00<?, ? examples/s]

{'input_ids': [0, 18581, 3916, 2072, 35, 19458, 35, 787, 1225, 4432, 1096, 20280, 165, 328, 939, 475, 1884, 7, 120, 1257, 1754, 510, 20529, 27785, 24, 924, 15, 5, 998, 24, 34, 158, 360, 5010, 21784, 6, 64, 1717, 3922, 162, 99, 16, 24, 17487, 50118, 45443, 35, 787, 2481, 3897, 2036, 166, 348, 10, 158, 7033, 5010, 714, 114, 5, 6880, 47, 829, 16, 5009, 50, 31559, 4, 37249, 10237, 50118, 44799, 35, 787, 25146, 28780, 5148, 27785, 125, 99, 114, 939, 399, 17, 27, 90, 101, 5, 1152, 8, 236, 7, 671, 24, 50118, 45443, 35, 787, 2481, 3897, 2036, 166, 1979, 75, 28, 441, 7, 3264, 5, 23312, 2886, 4, 286, 55, 335, 15, 1830, 2886, 714, 4, 17161, 352, 3753, 15, 5, 3104, 1373, 259, 35, 1205, 640, 90, 4, 876, 73, 571, 40969, 9380, 530, 4154, 510, 975, 4, 3166, 19954, 877, 110, 2969, 4, 50118, 44799, 35, 787, 25146, 28780, 5148, 2446, 27785, 125, 209, 32, 5567, 15797, 98, 473, 24, 1266, 276, 714, 3253, 13, 209, 25, 157, 50118, 45443, 35, 787, 2481, 3897, 2036, 3216, 6, 30845, 73, 5567, 15797, 32, 45, 4973

## Setup Training Evaluation

In [16]:
!pip install -U nltk

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: nltk
  Attempting uninstall: nltk
    Found existing installation: nltk 3.2.4
    Uninstalling nltk-3.2.4:
      Successfully uninstalled nltk-3.2.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
preprocessing 0.1.13 requires nltk==3.2.4, but you have nltk 3.9.1 which is incompatible.[0m[31m
[0mSuccessfully installed nltk-3.9.1


In [17]:
!pip install evaluate pyrouge rouge_score bert_score meteor

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting pyrouge
  Downloading pyrouge-0.1.3.tar.gz (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.5/60.5 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting meteor
  Downloading meteor-2.0.16-py3-none-any.whl.metadata (8.3 kB)
Collecting bgzip<0.6.0,>=0.5.0 (from meteor)
  Downloading bgzip-0.5.0.tar.gz (100 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.2/100.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting biom-format<3.0.0,>=2.1.15 (from meteor)
  Downloading biom-format-2.1.16.tar.gz (11.7 MB)
[2K     [90m

In [18]:
import evaluate, nltk, csv
rouge = evaluate.load("rouge")
meteor = evaluate.load("meteor")
bertscore = evaluate.load("bertscore")

nltk.download('punkt_tab')

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.93k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /usr/share/nltk_data...


Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

[nltk_data] Downloading package punkt_tab to /usr/share/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [19]:
def compute_metrics_abs(eval_pred):
    predictions, labels = eval_pred
    # Extra line added to address an overflow: https://github.com/huggingface/transformers/issues/22634
    predictions = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]

    rouge_scores = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True, use_aggregator=True)
    bert_scores = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")
    bert_scores.pop('hashcode')
    result = {
      **{f"rouge/{k}": round(v, 4) for k,v in rouge_scores.items()},
      **{f"bertscore/bertscore-{k}": round(np.mean(v), 4) for k,v in bert_scores.items()},
      'meteor': round(meteor.compute(predictions=decoded_preds, references=decoded_labels)['meteor'], 4),
    }
   
    result["gen_len"] = np.mean(prediction_lens)
    return result


## Train and Evaluate

In [20]:
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint_bart)

pytorch_model.bin:   0%|          | 0.00/611M [00:00<?, ?B/s]

In [21]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [22]:
my_batch = data_collator(tokenized_tweetsumm_abs['train'])
assert len(my_batch) == 4 # default setting for the model

In [23]:
EXPERIMENT_PARAMS = []
BASE_PARAMS = {'lr':3e-5, 'batch_size':4, 'epochs': 6}
EXPERIMENT_PARAMS.append(BASE_PARAMS)

In [24]:
LEARN_RATES = (3e-5, 3e-4, 3e-6)
BATCH_SIZES = (4, 2, 8)
EPOCHS = (6,10)

for lr in LEARN_RATES:
    for batch_size in BATCH_SIZES:
        for epoch in EPOCHS:
            if lr == BASE_PARAMS['lr'] and batch_size == BASE_PARAMS['batch_size'] and epoch == BASE_PARAMS['epochs']:
                continue
            experiment = {'lr':lr, 'batch_size':batch_size, 'epochs': epoch}
            EXPERIMENT_PARAMS.append(experiment)

In [25]:
def run_post_training(split, test_details, test_df_temp: pd.DataFrame, tokenizer, experiment, run_name_model, epoch):
    # First line added due to label error, see 
    predictions = np.where(test_details.predictions != -100, test_details.predictions, tokenizer.pad_token_id)
    preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    test_df_temp['response'] = preds
    exp_res = None
    csv_items = {**experiment, **(test_details.metrics)}
    if not exp_res:
        exp_res = {k: list() for k in csv_items.keys()}
    else:
        for k, v in csv_items.items():
            exp_res[k].append(v)

    test_metrics_df = pd.DataFrame(exp_res)
    test_df_temp.convert_dtypes()
    test_metrics_df.convert_dtypes()
    wandb.log({run_name_model: test_details.metrics})
    preds_name = f"{split}_preds_{run_name_model.replace('-','_')}_{epoch}_bart.csv"
    metrics_name =  f"{split}_metrics_{run_name_model.replace('-','_')}_{epoch}_bart.csv"
    test_df_temp.to_csv(os.path.join(os.getcwd(), 'results', preds_name), index=False, header=False, encoding='utf-8', quoting=csv.QUOTE_ALL)
    test_metrics_df.to_csv(os.path.join(os.getcwd(), 'results', metrics_name), index=False, header=True, encoding='utf-8', quoting=csv.QUOTE_ALL)
    # Using wandb documentation: https://docs.wandb.ai/guides/artifacts
    for root, dirs, files in os.walk(os.path.join(os.getcwd(), 'results')):
        for file in files:
            artifact = wandb.Artifact(name=run_name_model, type="predictions")
            artifact.add_file(local_path=os.path.join(root, file), name=file)
            wandb.log_artifact(artifact)


In [26]:
class ExtraCallback(TrainerCallback):
    def __init__(self):
        self.experiment_rows = []
        
#     def on_log(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
#         print(len(state.log_history), state.log_history)
#         self.experiment_rows.append(state.log_history[-1])
#         wandb.log({'run_name': args.run_name, **state.log_history[-1]})
        
    def on_epoch_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
        # Save loss from state, log current epoch to wandb
        
        # 'lr': args['learning_rate'], 'batch_size': args['per_device_train_batch_size'], 'max_epochs' args['num_train_epochs']
        wandb.log({'run_name': args.run_name, **state.log_history[-1]})
#         df = pd.DataFrame(self.experiment_rows)
#         df = df.convert_dtypes()
#         df.to_csv(os.path.join('.', 'results', args['run_name'] + ".csv", header=True, index=False))
    
    def on_train_end(self, args, state, control, **kwargs):
        # Save and upload CSVs
        df = pd.DataFrame(state.log_history)
        df = df.convert_dtypes()
        df = df.groupby(['epoch'], as_index=False).mean()
        df.to_csv(os.path.join('.', 'results', args.run_name + ".csv"), header=True, index=False)
        
        
#         for split in ('train', 'validation', 'test'):
#             test_details = trainer.predict(tokenized_tweetsumm_abs[split], metric_key_prefix=split)
#             run_post_training(split, test_details, PD_DATASETS[split], tokenizer, exp, run_name_model, state.epoch)
#         if epoch in EPOCHS:
#             trainer.push_to_hub()
        

In [27]:
exp_res = None
for count, exp in enumerate(EXPERIMENT_PARAMS):
    current_time = get_current_time()
    run_name_model = f"temp-bart-abs-{current_time}-lr-{exp['lr']}-bs-{exp['batch_size']}-maxep-{exp['epochs']}"
    print("Starting experiment", count, run_name_model, "training")
    wandb.run.name = run_name_model
    wandb.run.save()

    training_args = Seq2SeqTrainingArguments(
        output_dir=os.path.join('.', run_model_name),
        eval_strategy="epoch",
        logging_strategy="epoch",
        # logging_steps=100,
        learning_rate=exp['lr'],
        per_device_train_batch_size=exp['batch_size'],
        per_device_eval_batch_size=exp['batch_size'],
        weight_decay=0.01,
        save_strategy="epoch", # "epoch",
        save_total_limit=1,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        num_train_epochs=exp['epochs'],
        predict_with_generate=True,
        fp16=True,
        generation_max_length=80,
        push_to_hub=False,
        report_to="none",
        run_name=run_name_model
    )
    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_tweetsumm_abs["train"].select(range(0,50)),
        eval_dataset=tokenized_tweetsumm_abs["validation"].select(range(0,10)),
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics_abs,
    )
    trainer.add_callback(ExtraCallback)
    training_start = time.time()
    trainer.train()
    training_end = time.time()
    print("Finished",  run_name_model, "time it took for training:", str(datetime.timedelta(seconds=(training_end-training_start))))



Starting experiment 0 bart-abs-1109-1147-lr-3e-05-bs-4-maxep-6 training


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,4.3755,3.195378,0.2686,0.105,0.234,0.2332,0.892,0.8445,0.8675,0.1558,14.9
2,2.9381,2.833267,0.3263,0.177,0.2735,0.2724,0.9256,0.8588,0.8908,0.2038,16.5
3,2.1267,2.655557,0.2982,0.1563,0.2503,0.2501,0.9117,0.852,0.8807,0.1788,16.1
4,1.6397,2.695941,0.3599,0.1731,0.3065,0.3053,0.9095,0.8654,0.8867,0.2624,21.2
5,1.3337,2.775159,0.3542,0.1423,0.2993,0.2987,0.9082,0.8681,0.8876,0.2339,23.7
6,1.1196,2.82752,0.3305,0.1305,0.274,0.2747,0.8996,0.8609,0.8797,0.2167,22.3


1 [{'loss': 4.3755, 'grad_norm': 14.370349884033203, 'learning_rate': 2.6153846153846157e-05, 'epoch': 1.0, 'step': 13}]


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2 [{'loss': 4.3755, 'grad_norm': 14.370349884033203, 'learning_rate': 2.6153846153846157e-05, 'epoch': 1.0, 'step': 13}, {'eval_loss': 3.195378303527832, 'eval_rouge/rouge1': 0.2686, 'eval_rouge/rouge2': 0.105, 'eval_rouge/rougeL': 0.234, 'eval_rouge/rougeLsum': 0.2332, 'eval_bertscore/bertscore-precision': 0.892, 'eval_bertscore/bertscore-recall': 0.8445, 'eval_bertscore/bertscore-f1': 0.8675, 'eval_meteor': 0.1558, 'eval_gen_len': 14.9, 'eval_runtime': 41.1023, 'eval_samples_per_second': 0.243, 'eval_steps_per_second': 0.073, 'epoch': 1.0, 'step': 13}]
3 [{'loss': 4.3755, 'grad_norm': 14.370349884033203, 'learning_rate': 2.6153846153846157e-05, 'epoch': 1.0, 'step': 13}, {'eval_loss': 3.195378303527832, 'eval_rouge/rouge1': 0.2686, 'eval_rouge/rouge2': 0.105, 'eval_rouge/rougeL': 0.234, 'eval_rouge/rougeLsum': 0.2332, 'eval_bertscore/bertscore-precision': 0.892, 'eval_bertscore/bertscore-recall': 0.8445, 'eval_bertscore/bertscore-f1': 0.8675, 'eval_meteor': 0.1558, 'eval_gen_len': 14

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,1.1546,2.958326,0.384,0.1702,0.3329,0.334,0.9156,0.8726,0.8935,0.2711,23.0
2,0.8704,3.03746,0.3233,0.1273,0.2836,0.2837,0.9053,0.8624,0.8832,0.2278,22.4
3,0.5304,3.362633,0.3829,0.1568,0.3165,0.3145,0.8994,0.868,0.8833,0.2914,26.9
4,0.3636,3.577489,0.3532,0.1277,0.2739,0.2753,0.9005,0.867,0.8833,0.2665,26.4
5,0.2621,3.700132,0.3793,0.1271,0.3132,0.3123,0.9058,0.8712,0.888,0.2809,26.2
6,0.2267,3.815955,0.3675,0.1333,0.3099,0.3087,0.9052,0.8686,0.8864,0.269,24.4


1 [{'loss': 1.1546, 'grad_norm': 11.703929901123047, 'learning_rate': 2.5384615384615386e-05, 'epoch': 1.0, 'step': 13}]
2 [{'loss': 1.1546, 'grad_norm': 11.703929901123047, 'learning_rate': 2.5384615384615386e-05, 'epoch': 1.0, 'step': 13}, {'eval_loss': 2.9583256244659424, 'eval_rouge/rouge1': 0.384, 'eval_rouge/rouge2': 0.1702, 'eval_rouge/rougeL': 0.3329, 'eval_rouge/rougeLsum': 0.334, 'eval_bertscore/bertscore-precision': 0.9156, 'eval_bertscore/bertscore-recall': 0.8726, 'eval_bertscore/bertscore-f1': 0.8935, 'eval_meteor': 0.2711, 'eval_gen_len': 23.0, 'eval_runtime': 2.8473, 'eval_samples_per_second': 3.512, 'eval_steps_per_second': 1.054, 'epoch': 1.0, 'step': 13}]
3 [{'loss': 1.1546, 'grad_norm': 11.703929901123047, 'learning_rate': 2.5384615384615386e-05, 'epoch': 1.0, 'step': 13}, {'eval_loss': 2.9583256244659424, 'eval_rouge/rouge1': 0.384, 'eval_rouge/rouge2': 0.1702, 'eval_rouge/rougeL': 0.3329, 'eval_rouge/rougeLsum': 0.334, 'eval_bertscore/bertscore-precision': 0.9156,

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,0.4589,3.833492,0.3933,0.1513,0.3247,0.3242,0.9045,0.8699,0.8867,0.3029,24.7
2,0.4141,3.750737,0.3755,0.1223,0.3109,0.3103,0.8953,0.8701,0.8824,0.2852,30.5
3,0.289,4.050595,0.3232,0.0953,0.2399,0.2394,0.8898,0.8595,0.8743,0.2339,24.9
4,0.1859,4.168713,0.4318,0.1652,0.3553,0.3535,0.9049,0.8714,0.8877,0.3246,26.1
5,0.1205,4.419093,0.4106,0.1463,0.3363,0.3365,0.8998,0.8709,0.885,0.3027,27.7
6,0.0855,4.40286,0.333,0.0957,0.2725,0.2721,0.8912,0.8624,0.8763,0.252,27.3


1 [{'loss': 0.4589, 'grad_norm': 11.128762245178223, 'learning_rate': 2.54e-05, 'epoch': 1.0, 'step': 25}]
2 [{'loss': 0.4589, 'grad_norm': 11.128762245178223, 'learning_rate': 2.54e-05, 'epoch': 1.0, 'step': 25}, {'eval_loss': 3.833491802215576, 'eval_rouge/rouge1': 0.3933, 'eval_rouge/rouge2': 0.1513, 'eval_rouge/rougeL': 0.3247, 'eval_rouge/rougeLsum': 0.3242, 'eval_bertscore/bertscore-precision': 0.9045, 'eval_bertscore/bertscore-recall': 0.8699, 'eval_bertscore/bertscore-f1': 0.8867, 'eval_meteor': 0.3029, 'eval_gen_len': 24.7, 'eval_runtime': 3.2105, 'eval_samples_per_second': 3.115, 'eval_steps_per_second': 1.557, 'epoch': 1.0, 'step': 25}]
3 [{'loss': 0.4589, 'grad_norm': 11.128762245178223, 'learning_rate': 2.54e-05, 'epoch': 1.0, 'step': 25}, {'eval_loss': 3.833491802215576, 'eval_rouge/rouge1': 0.3933, 'eval_rouge/rouge2': 0.1513, 'eval_rouge/rougeL': 0.3247, 'eval_rouge/rougeLsum': 0.3242, 'eval_bertscore/bertscore-precision': 0.9045, 'eval_bertscore/bertscore-recall': 0.86

In [28]:
def log_csv_wandb(results_path, run_name_model):
    for root, dirs, files in os.walk(results_path):
        for file in files:
            artifact = wandb.Artifact(name=run_name_model, type="predictions")
            artifact.add_file(local_path=os.path.join(root, file), name=file)
            wandb.log_artifact(artifact)

In [29]:
!ls results

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


bart-abs-1109-1147-lr-3e-05-bs-4-maxep-6.csv
bart-abs-1109-1149-lr-3e-05-bs-2-maxep-6.csv
bart-abs-1109-1149-lr-3e-05-bs-4-maxep-10.csv


In [30]:
log_csv_wandb(os.path.join(os.getcwd(), 'results'), run_name_model)

In [31]:
print("Finished all training and evaluation for", run_name)
wandb.finish()

Finished all training and evaluation for bart-abs-1109-1144


VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▄▄▅▅▇▇███▁▁▂▂▄▄▅▅▇▇███▁▁▂▂▄▄▅▅▇▇███
eval_bertscore/bertscore-f1,▁▇▅▆▆▄█▅▅▅▇▆▆▅▃▆▆▃
eval_bertscore/bertscore-precision,▁█▅▅▅▃▆▄▃▃▄▄▄▂▁▄▃▁
eval_bertscore/bertscore-recall,▁▅▃▆▇▅█▅▇▇█▇▇▇▅██▅
eval_gen_len,▁▂▂▄▅▄▅▄▆▆▆▅▅█▅▆▇▇
eval_loss,▃▂▁▁▁▂▂▃▄▅▅▆▆▅▇▇██
eval_meteor,▁▃▂▅▄▄▆▄▇▆▆▆▇▆▄█▇▅
eval_rouge/rouge1,▁▃▂▅▅▄▆▃▆▅▆▅▆▆▃█▇▄
eval_rouge/rouge2,▂█▆█▅▄▇▄▆▄▄▄▆▃▁▇▅▁
eval_rouge/rougeL,▁▃▂▅▅▃▇▄▆▃▆▅▆▅▁█▇▃

0,1
epoch,6.0
eval_bertscore/bertscore-f1,0.8763
eval_bertscore/bertscore-precision,0.8912
eval_bertscore/bertscore-recall,0.8624
eval_gen_len,27.3
eval_loss,4.40286
eval_meteor,0.252
eval_rouge/rouge1,0.333
eval_rouge/rouge2,0.0957
eval_rouge/rougeL,0.2725


In [32]:
print("Results uploaded")

Results uploaded
