# Abstractive summaries - Train DistilBART on TWEETSUMM dataset

In [1]:
from huggingface_hub import login
import pandas as pd
import numpy as np
import os, time, datetime

from datasets import Dataset, DatasetDict

from transformers import DataCollatorForSeq2Seq, AutoTokenizer, set_seed
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer

import wandb

In [2]:
!pip freeze

  pid, fd = os.forkpty()


absl-py==1.4.0
accelerate==0.33.0
access==1.1.9
affine==2.4.0
aiobotocore==2.13.2
aiofiles==22.1.0
aiohttp @ file:///home/conda/feedstock_root/build_artifacts/aiohttp_1713964853148/work
aioitertools==0.11.0
aiosignal @ file:///home/conda/feedstock_root/build_artifacts/aiosignal_1667935791922/work
aiosqlite==0.20.0
albucore==0.0.13
albumentations==1.4.14
alembic==1.13.2
altair==5.4.0
annotated-types @ file:///home/conda/feedstock_root/build_artifacts/annotated-types_1716290248287/work
annoy==1.17.3
ansicolors==1.1.8
anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1717693030552/work
apache-beam==2.46.0
aplus==0.11.0
appdirs==1.4.4
archspec @ file:///home/conda/feedstock_root/build_artifacts/archspec_1708969572489/work
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1692818318753/work
argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1695386546427/work
array_record==0.5.1
arrow @ file:///home/conda/fee

In [3]:
ds_dir = '.\\data\\'
try:
    HF_TOKEN =  os.environ['HF_TOKEN']
except:
    HF_TOKEN = ""

if 'google.colab' in str(get_ipython()):
    print("Running on Colab")
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    HF_TOKEN = userdata.get('HF_TOKEN')
elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE') != None:
    ds_dir = '/kaggle/input/bertdata2207/'
    # ds_dir="/kaggle/input/bertdata2207/"
    from kaggle_secrets import UserSecretsClient
    print("Running on Kaggle")
    # ds_dir = "/kaggle/input/tweet-data-2106-1512/"
    user_secrets = UserSecretsClient()
    HF_TOKEN = user_secrets.get_secret("HF_TOKEN")
    WANDB_API_KEY = user_secrets.get_secret("WANDB_API_KEY")
    os.environ['WANDB_API_KEY'] = WANDB_API_KEY


Running on Kaggle


In [4]:
set_seed(17)
os.environ["WANDB_PROJECT"] = "aiml-thesis-train"

In [55]:
wandb.init(settings=wandb.Settings(start_method="thread"))

[34m[1mwandb[0m: Currently logged in as: [33mdawidk5[0m ([33mdawidk5ul[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [56]:
login(token=HF_TOKEN)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Load data

In [7]:
print(ds_dir)

/kaggle/input/bertdata2207/


In [8]:
checkpoint_bart = "sshleifer/distilbart-xsum-12-6"

In [9]:
train_df_temp = pd.read_csv(ds_dir + "dials_abs_2607_1312_train_spc.csv", names=['conv_id','dialogue','summary'], encoding='utf-8', dtype={'conv_id':'string', 'dialogue':'string', 'summary': 'string'})
train_df_temp.convert_dtypes()
train_df_temp.drop(columns=['conv_id'], inplace=True)
train_df_temp.reset_index(drop=True, inplace=True)

val_df_temp = pd.read_csv(ds_dir + "dials_abs_2607_1312_valid_spc.csv", names=['conv_id','dialogue','summary'], encoding='utf-8', dtype={'conv_id':'string', 'dialogue':'string', 'summary': 'string'})
val_df_temp.convert_dtypes()
val_df_temp.drop(columns=['conv_id'], inplace=True)
val_df_temp.reset_index(drop=True, inplace=True)

test_df_temp = pd.read_csv(ds_dir + "dials_abs_2607_1312_test_spc.csv", names=['conv_id','dialogue','summary'], encoding='utf-8', dtype={'conv_id':'string', 'dialogue':'string', 'summary': 'string'})
test_df_temp.convert_dtypes()
test_df_temp.reset_index(drop=True, inplace=True)

print(train_df_temp.dtypes)
print(train_df_temp.head())

dialogue    string[python]
summary     string[python]
dtype: object
                                            dialogue  \
0  Customer: So neither my iPhone nor my Apple Wa...   
1  Customer: @115850 hi team! i m planning to get...   
2  Customer: @AskAmex Where do I write to address...   
3  Customer: @AmazonHelp @115821 Wow, expected 4 ...   
4  Customer: @GWRHelp I'd rather you spent some t...   

                                             summary  
0  Customer enquired about his Iphone and Apple w...  
1  Customer is eager to know about the replacemen...  
2  Signed up for an AmexCard with Delta but it di...  
3  The customer have a problem. The agent is very...  
4  Customer cannot purchase a train ticket on the...  


In [10]:
tweetsumm_abs = DatasetDict(
    {
        'train': Dataset.from_pandas(train_df_temp),
        'validation': Dataset.from_pandas(val_df_temp),
        'test': Dataset.from_pandas(test_df_temp)
    }
)

In [14]:
# bart_tokenizer.max_source_length = 512
# bart_tokenizer.max_target_length = 80
tokenizer = AutoTokenizer.from_pretrained(checkpoint_bart)
print(tokenizer)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

BartTokenizerFast(name_or_path='sshleifer/distilbart-xsum-12-6', vocab_size=50265, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	1: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	3: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	50264: AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True, special=True),
}




In [16]:
# Source: https://huggingface.co/docs/transformers/en/tasks/summarization

def preprocess_function(examples):
    prefix = "summarize: "
    inputs = [str(prefix) + str(dial) for dial in examples["dialogue"]]
    with tokenizer.as_target_tokenizer():
        model_inputs = tokenizer(inputs, max_length=512, truncation=True) # same params as tweetsumm paper
        labels = tokenizer(text_target=examples["summary"], max_length=80, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    print(model_inputs.keys())
    return model_inputs

In [48]:
tokenized_tweetsumm_abs = tweetsumm_abs.map(preprocess_function, batched=True, remove_columns=['dialogue','summary'])
print(tokenized_tweetsumm_abs["train"][1])

Map:   0%|          | 0/867 [00:00<?, ? examples/s]



dict_keys(['input_ids', 'attention_mask', 'labels'])


Map:   0%|          | 0/110 [00:00<?, ? examples/s]

dict_keys(['input_ids', 'attention_mask', 'labels'])


Map:   0%|          | 0/109 [00:00<?, ? examples/s]

dict_keys(['input_ids', 'attention_mask', 'labels'])
{'input_ids': [0, 18581, 3916, 2072, 35, 19458, 35, 787, 1225, 4432, 1096, 20280, 165, 328, 939, 475, 1884, 7, 120, 1257, 1754, 510, 20529, 27785, 24, 924, 15, 5, 998, 24, 34, 158, 360, 5010, 21784, 6, 64, 1717, 3922, 162, 99, 16, 24, 17487, 50118, 45443, 35, 787, 2481, 3897, 2036, 166, 348, 10, 158, 7033, 5010, 714, 114, 5, 6880, 47, 829, 16, 5009, 50, 31559, 4, 37249, 10237, 50118, 44799, 35, 787, 25146, 28780, 5148, 27785, 125, 99, 114, 939, 399, 17, 27, 90, 101, 5, 1152, 8, 236, 7, 671, 24, 50118, 45443, 35, 787, 2481, 3897, 2036, 166, 1979, 75, 28, 441, 7, 3264, 5, 23312, 2886, 4, 286, 55, 335, 15, 1830, 2886, 714, 4, 17161, 352, 3753, 15, 5, 3104, 1373, 259, 35, 1205, 640, 90, 4, 876, 73, 571, 40969, 9380, 530, 4154, 510, 975, 4, 3166, 19954, 877, 110, 2969, 4, 50118, 44799, 35, 787, 25146, 28780, 5148, 2446, 27785, 125, 209, 32, 5567, 15797, 98, 473, 24, 1266, 276, 714, 3253, 13, 209, 25, 157, 50118, 45443, 35, 787, 2481, 3897

## Setup Training Evaluation

In [18]:
!pip install -U nltk

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: nltk
  Attempting uninstall: nltk
    Found existing installation: nltk 3.2.4
    Uninstalling nltk-3.2.4:
      Successfully uninstalled nltk-3.2.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
preprocessing 0.1.13 requires nltk==3.2.4, but you have nltk 3.9.1 which is incompatible.[0m[31m
[0mSuccessfully installed nltk-3.9.1


In [19]:
!pip install evaluate pyrouge rouge_score bert_score meteor

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl.metadata (9.3 kB)
Collecting pyrouge
  Downloading pyrouge-0.1.3.tar.gz (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.5/60.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting meteor
  Downloading meteor-2.0.15-py3-none-any.whl.metadata (8.3 kB)
Collecting bgzip<0.6.0,>=0.5.0 (from meteor)
  Downloading bgzip-0.5.0.tar.gz (100 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.2/100.2 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting biom-format<3.0.0,>=2.1.15 (from meteor)
  Downloading biom-format-2.1.16.tar.gz (11.7 MB)
[2K     [90m

In [20]:
import evaluate, nltk
rouge = evaluate.load("rouge")
meteor = evaluate.load("meteor")
bertscore = evaluate.load("bertscore")

nltk.download('punkt_tab')

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.93k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /usr/share/nltk_data...


Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

[nltk_data] Downloading package punkt_tab to /usr/share/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [58]:
def compute_metrics_abs(eval_pred):
    predictions, labels = eval_pred
    predictions = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
    with tokenizer.as_target_tokenizer():
        decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
        labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
        prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]

    rouge_scores = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True, use_aggregator=True)
    bert_scores = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")
    bert_scores.pop('hashcode')
    result = {
      **{f"rouge/{k}": round(v, 4) for k,v in rouge_scores.items()},
      **{f"bertscore/bertscore-{k}": round(np.mean(v), 4) for k,v in bert_scores.items()},
      'meteor': round(meteor.compute(predictions=decoded_preds, references=decoded_labels)['meteor'], 4),
    }
   
    result["gen_len"] = np.mean(prediction_lens)
    return result


## Train

In [22]:
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint_bart)

pytorch_model.bin:   0%|          | 0.00/611M [00:00<?, ?B/s]

In [23]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

In [42]:
def check_dataset_lengths(split, tokenizer, dialogues, summaries, max_source_length=512, max_target_length=80):
    issues = []
    for idx, (dialogue, summary) in enumerate(zip(dialogues, summaries)):
        input_encoded = tokenizer(dialogue, truncation=True, max_length=max_source_length)
        summary_encoded = tokenizer(summary, truncation=True, max_length=max_target_length)
        if len(input_encoded['input_ids']) == 0:
            issues.append(f"{split} row {idx}: empty dialogue")
        if len(summary_encoded['input_ids']) == 0:
            issues.append(f"{split} row {idx}: empty summary")
        if None in input_encoded['input_ids'] or None in summary_encoded['input_ids']:
            issues.append(f"{split} row {idx}: None value in tokenized output")
        if len(input_encoded['input_ids']) > max_source_length:
            issues.append(f"{split} row {idx}: dialogue truncation failed")
        if len(summary_encoded['input_ids']) > max_target_length:
            issues.append(f"{split} row {idx}: summary truncation failed")
    return issues

def check_tokens_padding(split, tokenizer, dataset):
    issues = []
    special_tokens = set(tokenizer.all_special_ids)
    for idx, row in enumerate(dataset):
        dialogues = row['input_ids']
        summaries = row['labels']
        if not any(token in special_tokens for token in dialogues):
            issues.append(f"{split} row {idx}: no special tokens in dialogue")
        if tokenizer.pad_token_id in dialogues[:-1]:
            issues.append(f"{split} row {idx}: dialogue not padded")
        if tokenizer.pad_token_id in summaries[:-1]:
            issues.append(f"{split} row {idx}: summary not padded")
    return issues

def check_collator_batches(split, dataset, data_collator, batch_size=4):
    issues = []
    if len(dataset) == 0:
        issues.append("Dataset empty")
    collated = data_collator(dataset)
    print(collated)
    return issues
    for i in range(len(dataset) // batch_size):
        print("abc", i*batch_size, (i+1)*batch_size)
        batch = dataset[i*batch_size : (i+1)*batch_size]
        print("def")
        collated = data_collator(batch)
        print(collated)
        break
        try:
            if any(None in collated[key] for key in collated):
                issues.append(f"{split} batch {i}: None values after collation")
        except Exception as e:
            issues.append(f"{split} batch {i}: collation error - {e}")
    return issues

In [43]:
print(type(tokenized_tweetsumm_abs['train'][0:5]))
print(len(tokenized_tweetsumm_abs['train'][0:5]['input_ids']))
my_batch = data_collator(tokenized_tweetsumm_abs['train'])
# print(my_batch)
# print(data_collator(tokenized_tweetsumm_abs['train'][0:5]['labels']))

<class 'dict'>
5


In [52]:
# os.chdir('temp')
# from debugtokens import check_tokenization_and_length, check_special_tokens_and_padding, check_dataset_and_collator
SPLITS= ('train', 'test', 'validation')
issues = []
for split in SPLITS:
    issues += check_dataset_lengths(split, tokenizer, tweetsumm_abs[split]['dialogue'], tweetsumm_abs[split]['summary'])
    issues += check_tokens_padding(split, tokenizer, tokenized_tweetsumm_abs[split])
    # issues += check_collator_batches(split, tokenized_tweetsumm_abs[split].select(range(0,5)), data_collator)
for issue in issues:
    print(issue)

In [53]:
LEARN_RATES = (3e-5, 3e-4, 3e-6)

In [59]:
for exp_idx in range(0,4):
    
    current_time = datetime.datetime.now().strftime("%d%m-%H%M")
    print(current_time)
    run_name_model = f"distilbart-abs-{current_time}-lr-{LEARN_RATES[exp_idx]}"
    wandb.run.name = run_name_model
    wandb.run.save()

    training_args = Seq2SeqTrainingArguments(
        output_dir=f"trained-distilbart-abs-{current_time[0:4]}",
        eval_strategy="epoch",
        logging_strategy="steps",
        logging_steps=10,
        learning_rate=LEARN_RATES[exp_idx],
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        weight_decay=0.01,
        save_strategy="epoch",
        save_total_limit=6,
        num_train_epochs=6,
        predict_with_generate=True,
        fp16=True,
        generation_max_length=80,
        # generation_config=gen_config,
        push_to_hub=True,
        report_to="wandb",
        run_name=run_name_model
    )
    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_tweetsumm_abs["train"],
        eval_dataset=tokenized_tweetsumm_abs["validation"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics_abs,
    )

    training_start = time.time()
    trainer.train()
    training_end = time.time()
    print("Time it took for training:", str(datetime.timedelta(seconds=(training_end-training_start))))
    trainer.push_to_hub(run_name_model)

0409-2214


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss,Rouge/rouge1,Rouge/rouge2,Rouge/rougel,Rouge/rougelsum,Bertscore/bertscore-precision,Bertscore/bertscore-recall,Bertscore/bertscore-f1,Meteor,Gen Len
1,1.809,2.091993,0.4434,0.2026,0.3822,0.3831,0.8967,0.8895,0.8929,0.3945,37.6
2,1.3042,2.053256,0.4645,0.2261,0.4026,0.4043,0.9014,0.8937,0.8974,0.4125,34.290909
3,1.0346,2.203129,0.4675,0.2184,0.4007,0.4019,0.9,0.8926,0.8962,0.4154,35.036364
4,0.6878,2.349631,0.4707,0.2157,0.3984,0.3999,0.8987,0.8915,0.895,0.4128,35.463636
5,0.6453,2.554292,0.4691,0.2162,0.4019,0.4037,0.8977,0.8934,0.8954,0.4174,36.209091


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repeat_ngram_size': 3, 'forced_eos_token_id': 2}
Non-default generation parameters: {'max_length': 62, 'min_length': 11, 'early_stopping': True, 'num_beams': 6, 'length_penalty': 0.5, 'no_repea

RuntimeError: [enforce fail at inline_container.cc:603] . unexpected pos 92096 vs 91988

--- Logging error ---
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/opt/conda/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 28] No space left on device
Call stack:
  File "/opt/conda/lib/python3.10/threading.py", line 973, in _bootstrap
    self._bootstrap_inner()
  File "/opt/conda/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/internal/internal_util.py", line 48, in run
    self._run()
  File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/internal/internal_util.py", line 100, in _run
    self._finish()
  File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/internal/internal.py", line 330, in _finish
    self._sm.finish()
  File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/internal/sender.py", line 1636, in finish
    self._dir_watcher.finish()
  F

In [63]:
!du -h

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b27b08fab60>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7b2783ed4790, raw_cell="!du -h" store_history=True silent=False shell_futures=True cell_id=3d060b14-0189-464b-8db8-9fc1b251c92b>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


4.0K	./wandb/run-20240904_220321-v6rhkfov/tmp/code
8.0K	./wandb/run-20240904_220321-v6rhkfov/tmp
56K	./wandb/run-20240904_220321-v6rhkfov/files
376K	./wandb/run-20240904_220321-v6rhkfov/logs
544K	./wandb/run-20240904_220321-v6rhkfov
548K	./wandb
4.0K	./.virtual_documents
3.5G	./trained-distilbart-abs-0409/checkpoint-434
3.5G	./trained-distilbart-abs-0409/checkpoint-651
3.5G	./trained-distilbart-abs-0409/checkpoint-868
1.3G	./trained-distilbart-abs-0409/checkpoint-1302
3.5G	./trained-distilbart-abs-0409/checkpoint-1085
3.5G	./trained-distilbart-abs-0409/checkpoint-217
20G	./trained-distilbart-abs-0409
20G	.
Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b27b08fab60>> (for post_run_cell), with arguments args (<ExecutionResult object at 7b2783ed49d0, execution_count=63 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7b2783ed4790, raw_cell="!du -h" store_history=True silent=False shell_futures=True cell_id=

BrokenPipeError: [Errno 32] Broken pipe

In [None]:
wandb.finish()