In [1]:
from utils import *

from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import load_dataset, concatenate_datasets, Dataset
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [2]:
def one_hot_encode(y, nb_classes=4):
    if not isinstance(y, np.ndarray):
        y = np.expand_dims(np.array(y), 0)
    res = np.eye(nb_classes)[np.array(y).reshape(-1)]
    return res.reshape(list(y.shape)+[nb_classes])[0]

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1.mean(),
        'precision': precision.mean(),
        'recall': recall.mean()
    }

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')

def compute_metrics_w_soft_target(pred):
    labels = pred.label_ids
    preds = pred.predictions
    acc = acc_at_k(labels, preds, k=2)
    return {
        'accuracy': acc,
    }

class Trainer_w_soft_target(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = CEwST_loss(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [3]:
from sklearn.datasets import fetch_20newsgroups

def get_20NG_test_dataset():
    cats = [
        'talk.politics.mideast',                                # Wolrd 0
        'rec.sport.hockey', 'rec.sport.baseball',               # Sports 1
        # 'misc.forsale',                                       # Business 2
        'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space', # Sci/Tech 3
    ]

    dataset = fetch_20newsgroups(
        subset='all',
        categories=cats,
        remove=('headers', 'footers', 'quotes')
    )

    df = pd.DataFrame([dataset.data, dataset.target]).T
    df.rename(columns={0:'text', 1: 'label'}, inplace=True)

    mapper = {
        0: 1,
        1: 1,
        2: 3,
        3: 3,
        4: 3,
        5: 3,
        6: 0,
    }

    df.label = df.label.map(mapper)
    df.text = df.text.replace('\n', ' ', regex=True).str.strip()

    test_dataset = Dataset.from_pandas(df)
    
    return test_dataset

In [4]:
# ['ORIG', 'INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']
# ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']

In [5]:
MODEL_NAMES = ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']

In [6]:
use_pretrain = False

results = []
for MODEL_NAME in MODEL_NAMES:
    for t in ['ORIG', 'INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']: 
                        
        soft_target = False
        eval_only = False
        
        checkpoint = 'pretrained/' + MODEL_NAME + "-ag_news-ORIG+" + t 
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        
        if t == 'ORIG':
            train_dataset = load_dataset('ag_news', split='train')
        else:
            # load custom data    
            text = npy_load("./assets/AG_NEWS/" + t + "/text.npy")
            label = npy_load("./assets/AG_NEWS/" + t + "/label.npy")
            if len(label.shape) > 1:
                df = pd.DataFrame({'text': text, 'label': label.tolist()})
                df.text = df.text.astype(str)
                df.label = df.label.map(lambda y: np.array(y))
            else:
                df = pd.DataFrame({'text': text, 'label': label})
                df.text = df.text.astype(str)
                df.label = df.label.astype(object)
            train_dataset = Dataset.from_pandas(df)  
            
            # load orig data
            orig_dataset = load_dataset('ag_news', split='train')
            df = orig_dataset.to_pandas()
            df = df[df.columns[::-1]]
            df.text = df.text.astype(str)
            if len(label.shape) > 1:
                df.label = df.label.map(one_hot_encode)
            else:
                df.label = df.label.astype(object)
            orig_dataset = Dataset.from_pandas(df)
            
            # merge orig + custom data
            train_dataset = concatenate_datasets([orig_dataset, train_dataset])
            train_dataset.shuffle()
            
        if use_pretrain and os.path.exists(checkpoint):
            print('loading {}...'.format(checkpoint))
            MODEL_NAME = checkpoint
            eval_only = True
                
        dataset_dict = train_dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']
        test_dataset = load_dataset('ag_news')['test']
        test_dataset_20NG = get_20NG_test_dataset()
        
        # # reduce training time
        # n = 10000
        # train_dataset = Dataset.from_dict(train_dataset[:n])
        # eval_dataset = Dataset.from_dict(eval_dataset[:n])
        # test_dataset = Dataset.from_dict(test_dataset[:n])

        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=4).to(device)
                
        train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))
        eval_dataset = eval_dataset.map(tokenize, batched=True, batch_size=len(eval_dataset))
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        test_dataset_20NG = test_dataset_20NG.map(tokenize, batched=True, batch_size=len(test_dataset_20NG))
        train_dataset.rename_column_('label', 'labels')
        eval_dataset.rename_column_('label', 'labels')
        test_dataset.rename_column_('label', 'labels')
        test_dataset_20NG.rename_column_('label', 'labels')
        train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        eval_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset_20NG.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        if len(np.array(train_dataset['labels']).shape) > 1:
            soft_target = True

        train_batch_size = 3
        eval_batch_size = 32
        num_epoch = 3
        gradient_accumulation_steps=1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)
        
        training_args = TrainingArguments(
            seed=1,
            # adafactor=True,
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=2000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            run_name=checkpoint,
            label_names=['World', 'Sports', 'Business', 'Sci/Tech']
        )
        
        if soft_target:
            trainer = Trainer_w_soft_target(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics_w_soft_target,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
                data_collator=DefaultCollator(),
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )
        else: 
            trainer = Trainer(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics,
                train_dataset=train_dataset,
                eval_dataset=test_dataset,
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )

        if not eval_only:
            trainer.train()
            
        trainer.compute_metrics = compute_metrics
        
        # test with ORIG data
        trainer.eval_dataset = test_dataset
        out_orig = trainer.evaluate()
        out_orig['run'] = checkpoint
        out_orig['test'] = "ORIG"
        print('ORIG for {}\n{}'.format(checkpoint, out_orig))
        
        # test with 20NG data
        trainer.eval_dataset = test_dataset_20NG
        out_20NG = trainer.evaluate()
        out_20NG['run'] = checkpoint
        out_orig['test'] = "20NG"
        print('20NG for {}\n{}'.format(checkpoint, out_20NG))
        
        results.append(out_orig)
        results.append(out_20NG)
        
        # run.finish()

Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

rename_column_ is deprecated and will be removed in the next major version of datasets. Use the dataset.rename_column method instead.





W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1000,0.9889,0.352838,0.888421,0.888068,0.8881,0.888421,99.0976,76.692
2000,0.3947,0.552767,0.890132,0.890381,0.894159,0.890132,99.5474,76.346
3000,0.4638,0.525628,0.891053,0.890309,0.892337,0.891053,99.5046,76.378
4000,0.4932,0.410898,0.908553,0.908635,0.909427,0.908553,99.3925,76.465
5000,0.4752,0.471517,0.905789,0.904867,0.906456,0.905789,99.3379,76.507
6000,0.4468,0.403315,0.909737,0.909406,0.909337,0.909737,99.3482,76.499
7000,0.4603,0.634986,0.885789,0.884347,0.887594,0.885789,99.4079,76.453
8000,0.4564,0.472028,0.900658,0.900565,0.902545,0.900658,99.447,76.423
9000,0.4943,0.483188,0.902895,0.902507,0.907761,0.902895,99.3957,76.462
10000,0.5022,0.555143,0.888158,0.887378,0.892664,0.888158,99.4508,76.42


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+ORIG
{'eval_loss': 0.4033150374889374, 'eval_accuracy': 0.9097368421052632, 'eval_f1': 0.9094061072012141, 'eval_precision': 0.9093367410880967, 'eval_recall': 0.9097368421052632, 'eval_runtime': 99.7262, 'eval_samples_per_second': 76.209, 'epoch': 0.42, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+ORIG', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+ORIG
{'eval_loss': 0.7385788559913635, 'eval_accuracy': 0.837037037037037, 'eval_f1': 0.6280575721260904, 'eval_precision': 0.6296224723386277, 'eval_recall': 0.6327219457577359, 'eval_runtime': 90.5726, 'eval_samples_per_second': 76.016, 'epoch': 0.42, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+ORIG'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.





Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1000,1.2522,0.803688,0.831711,0.828349,0.840896,0.831711,99.6863,76.239
2000,0.514,0.469749,0.880263,0.880172,0.889672,0.880263,99.4955,76.385
3000,0.4907,0.438498,0.896053,0.895857,0.900809,0.896053,99.374,76.479
4000,0.5011,0.525659,0.894605,0.893364,0.901063,0.894605,99.4017,76.457
5000,0.4918,0.463651,0.907237,0.907336,0.908788,0.907237,99.3528,76.495
6000,0.4834,0.412154,0.907895,0.90711,0.908811,0.907895,99.3253,76.516
7000,0.5111,0.476239,0.900395,0.900437,0.905031,0.900395,99.2849,76.547
8000,0.4654,0.474432,0.908816,0.908096,0.908215,0.908816,99.296,76.539
9000,0.4856,0.418811,0.915,0.915012,0.916876,0.915,99.2699,76.559
10000,0.49,0.413746,0.914342,0.914438,0.915978,0.914342,99.3279,76.514


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+INV
{'eval_loss': 0.3881412446498871, 'eval_accuracy': 0.9196052631578947, 'eval_f1': 0.9194067849580483, 'eval_precision': 0.9199058408527168, 'eval_recall': 0.9196052631578948, 'eval_runtime': 99.5063, 'eval_samples_per_second': 76.377, 'epoch': 0.28, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+INV', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+INV
{'eval_loss': 0.6264991164207458, 'eval_accuracy': 0.8460421205519245, 'eval_f1': 0.6470054431060661, 'eval_precision': 0.6649069266909127, 'eval_recall': 0.6313812166447672, 'eval_runtime': 90.4282, 'eval_samples_per_second': 76.138, 'epoch': 0.28, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+INV'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.2488,0.868032,0.710995,157.1497,76.36
2000,0.6725,0.632342,0.762249,156.8552,76.504
3000,0.6113,0.578718,0.774072,156.9417,76.462
4000,0.5857,0.618272,0.771641,156.9022,76.481
5000,0.5715,0.652651,0.76731,156.9068,76.479
6000,0.5692,0.57843,0.773788,156.9261,76.469
7000,0.5976,0.590061,0.775067,156.947,76.459
8000,0.5547,0.635154,0.780247,156.8414,76.51
9000,0.5599,0.583565,0.7565,156.903,76.48
10000,0.5663,0.577659,0.764488,156.9193,76.472


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+SIB
{'eval_loss': 28.322521209716797, 'eval_accuracy': 0.916578947368421, 'eval_f1': 0.9163817606666912, 'eval_precision': 0.9167379706623926, 'eval_recall': 0.916578947368421, 'eval_runtime': 99.4292, 'eval_samples_per_second': 76.436, 'epoch': 0.32, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+SIB', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+SIB
{'eval_loss': 30.492338180541992, 'eval_accuracy': 0.837763253449528, 'eval_f1': 0.6353210078306538, 'eval_precision': 0.6499897335014365, 'eval_recall': 0.6299928576520968, 'eval_runtime': 90.3602, 'eval_samples_per_second': 76.195, 'epoch': 0.32, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+SIB'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.2171,0.762999,0.795851,157.5456,76.168
2000,0.5577,0.51346,0.832335,157.4458,76.217
3000,0.5354,0.539572,0.833888,157.4056,76.236
4000,0.507,0.556445,0.832132,157.3859,76.246
5000,0.5071,0.524399,0.840626,157.7718,76.059
6000,0.4972,0.51433,0.824058,157.9847,75.957
7000,0.5329,0.468887,0.832444,157.9893,75.955
8000,0.4691,0.541108,0.835458,157.8782,76.008
9000,0.4998,0.526469,0.846403,157.9671,75.965
10000,0.5165,0.501617,0.838525,157.8888,76.003


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+INVSIB
{'eval_loss': 29.862548828125, 'eval_accuracy': 0.9123684210526316, 'eval_f1': 0.9121085818185244, 'eval_precision': 0.9124523394613038, 'eval_recall': 0.9123684210526315, 'eval_runtime': 99.7859, 'eval_samples_per_second': 76.163, 'epoch': 0.28, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+INVSIB', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+INVSIB
{'eval_loss': 32.26008224487305, 'eval_accuracy': 0.8437182280319535, 'eval_f1': 0.6348138177857947, 'eval_precision': 0.6405850390711187, 'eval_recall': 0.6333807046505737, 'eval_runtime': 90.6306, 'eval_samples_per_second': 75.968, 'epoch': 0.28, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+INVSIB'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.2485,0.875998,0.692331,157.6587,76.114
2000,0.6675,0.587132,0.777458,157.2028,76.335
3000,0.5886,0.577437,0.748185,157.168,76.351
4000,0.5783,0.648647,0.771997,156.9974,76.434
5000,0.5659,0.580755,0.790991,156.8202,76.521
6000,0.5609,0.578808,0.77633,156.7701,76.545
7000,0.578,0.586657,0.77704,156.768,76.546
8000,0.5542,0.562955,0.803224,156.8725,76.495
9000,0.551,0.58366,0.765461,156.8697,76.497
10000,0.5437,0.567263,0.773353,157.0955,76.387


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+TextMix
{'eval_loss': 30.897172927856445, 'eval_accuracy': 0.9222368421052631, 'eval_f1': 0.9222287729685831, 'eval_precision': 0.9225743264714401, 'eval_recall': 0.9222368421052632, 'eval_runtime': 99.1179, 'eval_samples_per_second': 76.676, 'epoch': 0.32, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+TextMix', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+TextMix
{'eval_loss': 35.25430679321289, 'eval_accuracy': 0.8827886710239652, 'eval_f1': 0.6527804137681926, 'eval_precision': 0.6748044403786122, 'eval_recall': 0.6335572844876527, 'eval_runtime': 89.9611, 'eval_samples_per_second': 76.533, 'epoch': 0.32, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+TextMix'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.0902,0.871683,0.552291,237.1051,75.916
2000,0.6741,0.539954,0.635341,236.7881,76.017
3000,0.5455,0.494252,0.648102,236.5194,76.104
4000,0.5033,0.480785,0.641924,236.6163,76.073
5000,0.5061,0.478216,0.65245,236.7271,76.037
6000,0.4962,0.486739,0.632091,236.6911,76.048
7000,0.4826,0.467688,0.638218,236.7373,76.034
8000,0.481,0.470502,0.645665,236.7323,76.035
9000,0.4709,0.488926,0.650672,236.7457,76.031
10000,0.4633,0.461195,0.664419,236.5453,76.095


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+SentMix
{'eval_loss': 28.166885375976562, 'eval_accuracy': 0.9234210526315789, 'eval_f1': 0.9232460652517472, 'eval_precision': 0.9234335183887521, 'eval_recall': 0.9234210526315789, 'eval_runtime': 99.6893, 'eval_samples_per_second': 76.237, 'epoch': 0.21, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+SentMix', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+SentMix
{'eval_loss': 31.715604782104492, 'eval_accuracy': 0.8610021786492374, 'eval_f1': 0.6217180999731503, 'eval_precision': 0.6560021103465739, 'eval_recall': 0.599704937595565, 'eval_runtime': 90.6367, 'eval_samples_per_second': 75.963, 'epoch': 0.21, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+SentMix'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,0.9518,0.858275,0.356838,315.3392,76.109
2000,0.667,0.502162,0.509839,314.8148,76.235
3000,0.4702,0.444456,0.518961,314.7548,76.25
4000,0.4321,0.423378,0.534421,314.6073,76.286
5000,0.412,0.416031,0.536883,314.898,76.215
6000,0.4133,0.401901,0.537332,314.837,76.23
7000,0.4009,0.430192,0.528145,314.6581,76.273
8000,0.4226,0.401363,0.534487,314.4254,76.33
9000,0.411,0.40135,0.545044,314.5071,76.31
10000,0.3932,0.416524,0.549767,314.5447,76.301


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+WordMix
{'eval_loss': 29.58136749267578, 'eval_accuracy': 0.9196052631578947, 'eval_f1': 0.9197204941804656, 'eval_precision': 0.9207344909193765, 'eval_recall': 0.9196052631578947, 'eval_runtime': 99.6425, 'eval_samples_per_second': 76.273, 'epoch': 0.2, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+WordMix', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+WordMix
{'eval_loss': 34.08938217163086, 'eval_accuracy': 0.8663761801016703, 'eval_f1': 0.6379973514983143, 'eval_precision': 0.6448958199824899, 'eval_recall': 0.6326280193756362, 'eval_runtime': 90.4663, 'eval_samples_per_second': 76.106, 'epoch': 0.2, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+WordMix'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.





Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1000,0.9374,0.432164,0.894737,0.894513,0.897194,0.894737,98.9396,76.815
2000,0.5037,0.603055,0.8925,0.892208,0.897569,0.8925,98.8155,76.911
3000,0.486,0.474691,0.904474,0.904231,0.904497,0.904474,98.7661,76.949
4000,0.4731,0.476541,0.909079,0.909006,0.910443,0.909079,98.7703,76.946
5000,0.4659,0.526399,0.900789,0.900731,0.902092,0.900789,98.7443,76.966
6000,0.4636,0.459006,0.907368,0.907355,0.907718,0.907368,98.7691,76.947
7000,0.4496,0.581536,0.882368,0.882437,0.891973,0.882368,98.7765,76.941
8000,0.5102,0.517888,0.905263,0.905364,0.906708,0.905263,98.7339,76.975
9000,0.5145,0.548614,0.894737,0.894357,0.898308,0.894737,98.7479,76.964
10000,0.5524,0.574086,0.890132,0.888993,0.893776,0.890132,98.6796,77.017


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ORIG for pretrained/roberta-base-ag_news-ORIG+ORIG
{'eval_loss': 0.47654083371162415, 'eval_accuracy': 0.9090789473684211, 'eval_f1': 0.9090061024100096, 'eval_precision': 0.910443011846536, 'eval_recall': 0.9090789473684211, 'eval_runtime': 98.412, 'eval_samples_per_second': 77.226, 'epoch': 0.37, 'run': 'pretrained/roberta-base-ag_news-ORIG+ORIG', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+ORIG
{'eval_loss': 0.6181355714797974, 'eval_accuracy': 0.8723311546840958, 'eval_f1': 0.6425970259273684, 'eval_precision': 0.6457203714922186, 'eval_recall': 0.6408438120509388, 'eval_runtime': 89.693, 'eval_samples_per_second': 76.762, 'epoch': 0.37, 'run': 'pretrained/roberta-base-ag_news-ORIG+ORIG'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.





Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1000,1.166,0.394815,0.875,0.874449,0.87548,0.875,99.031,76.744
2000,0.4845,0.561577,0.891579,0.890981,0.894082,0.891579,98.9086,76.839
3000,0.5409,0.391491,0.907632,0.90763,0.909636,0.907632,98.7925,76.929
4000,0.5493,0.512982,0.904868,0.903972,0.9081,0.904868,98.8077,76.917
5000,0.526,0.521181,0.904079,0.903531,0.904649,0.904079,98.8045,76.92
6000,0.4877,0.465242,0.909737,0.90912,0.909938,0.909737,98.7792,76.939
7000,0.5382,0.406012,0.912105,0.911902,0.913485,0.912105,98.7791,76.939
8000,0.5094,0.534182,0.908684,0.908239,0.908382,0.908684,98.8336,76.897
9000,0.5016,0.443947,0.909211,0.909157,0.911095,0.909211,98.798,76.925
10000,0.5356,0.527275,0.903553,0.903497,0.905428,0.903553,98.8102,76.915


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ORIG for pretrained/roberta-base-ag_news-ORIG+INV
{'eval_loss': 0.4191950559616089, 'eval_accuracy': 0.9153947368421053, 'eval_f1': 0.9154108410249024, 'eval_precision': 0.9156046729186273, 'eval_recall': 0.9153947368421053, 'eval_runtime': 98.298, 'eval_samples_per_second': 77.316, 'epoch': 0.3, 'run': 'pretrained/roberta-base-ag_news-ORIG+INV', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+INV
{'eval_loss': 0.8295277953147888, 'eval_accuracy': 0.792156862745098, 'eval_f1': 0.6253903754257321, 'eval_precision': 0.66251156421205, 'eval_recall': 0.595616620037, 'eval_runtime': 89.6263, 'eval_samples_per_second': 76.819, 'epoch': 0.3, 'run': 'pretrained/roberta-base-ag_news-ORIG+INV'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.2197,0.65242,0.768045,157.3184,76.278
2000,0.6081,0.612829,0.790573,156.8334,76.514
3000,0.6244,0.565237,0.792021,156.767,76.547
4000,0.5893,0.605146,0.764189,156.7385,76.561
5000,0.5959,0.673987,0.766294,156.7841,76.538
6000,0.5718,0.591082,0.787536,156.7704,76.545
7000,0.604,0.569322,0.811584,156.7134,76.573
8000,0.5547,0.624532,0.765035,156.7768,76.542
9000,0.5858,0.589353,0.778963,156.7163,76.571
10000,0.5916,0.681393,0.770979,156.7735,76.544


ORIG for pretrained/roberta-base-ag_news-ORIG+SIB
{'eval_loss': 28.4146785736084, 'eval_accuracy': 0.9139473684210526, 'eval_f1': 0.9138727969768815, 'eval_precision': 0.9147344530067089, 'eval_recall': 0.9139473684210526, 'eval_runtime': 97.9732, 'eval_samples_per_second': 77.572, 'epoch': 0.22, 'run': 'pretrained/roberta-base-ag_news-ORIG+SIB', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+SIB
{'eval_loss': 35.062110900878906, 'eval_accuracy': 0.8621641249092229, 'eval_f1': 0.6336866606603448, 'eval_precision': 0.6727786799117397, 'eval_recall': 0.6056947042705287, 'eval_runtime': 89.299, 'eval_samples_per_second': 77.101, 'epoch': 0.22, 'run': 'pretrained/roberta-base-ag_news-ORIG+SIB'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1949,0.560067,0.810959,155.9885,76.929
2000,0.5459,0.547729,0.844907,155.7472,77.048
3000,0.5582,0.587455,0.833147,155.9118,76.967
4000,0.5346,0.630189,0.828062,156.0511,76.898
5000,0.5333,0.594924,0.846772,156.0155,76.915
6000,0.5245,0.52113,0.833656,156.0157,76.915
7000,0.5388,0.56491,0.829595,155.9224,76.961
8000,0.4813,0.603293,0.830018,155.9227,76.961
9000,0.5295,0.453696,0.8411,155.9521,76.947
10000,0.5444,0.56636,0.840132,155.9642,76.941


ORIG for pretrained/roberta-base-ag_news-ORIG+INVSIB
{'eval_loss': 28.758499145507812, 'eval_accuracy': 0.9167105263157894, 'eval_f1': 0.9166299656987345, 'eval_precision': 0.916595346491176, 'eval_recall': 0.9167105263157895, 'eval_runtime': 97.9767, 'eval_samples_per_second': 77.569, 'epoch': 0.33, 'run': 'pretrained/roberta-base-ag_news-ORIG+INVSIB', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+INVSIB
{'eval_loss': 34.00143051147461, 'eval_accuracy': 0.8217864923747277, 'eval_f1': 0.6221824558656399, 'eval_precision': 0.6439190515643213, 'eval_recall': 0.6080798774693251, 'eval_runtime': 89.3043, 'eval_samples_per_second': 77.096, 'epoch': 0.33, 'run': 'pretrained/roberta-base-ag_news-ORIG+INVSIB'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1693,0.588143,0.795455,155.7524,77.045
2000,0.5869,0.585704,0.804957,155.5115,77.165
3000,0.6012,0.557359,0.779747,155.5056,77.168
4000,0.5856,0.562697,0.786823,155.5568,77.142
5000,0.5958,0.591833,0.79942,155.6167,77.113
6000,0.5659,0.600729,0.779856,155.6012,77.12
7000,0.604,0.593605,0.794404,155.6085,77.117
8000,0.5554,0.614918,0.795494,155.6039,77.119
9000,0.5838,0.621258,0.782794,155.6161,77.113
10000,0.5572,0.634508,0.785453,155.5048,77.168


ORIG for pretrained/roberta-base-ag_news-ORIG+TextMix
{'eval_loss': 24.32720375061035, 'eval_accuracy': 0.9046052631578947, 'eval_f1': 0.904234236570405, 'eval_precision': 0.9041800493775376, 'eval_recall': 0.9046052631578947, 'eval_runtime': 98.0354, 'eval_samples_per_second': 77.523, 'epoch': 0.16, 'run': 'pretrained/roberta-base-ag_news-ORIG+TextMix', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+TextMix
{'eval_loss': 29.201152801513672, 'eval_accuracy': 0.8986201888162673, 'eval_f1': 0.6643465973185576, 'eval_precision': 0.6698015285063132, 'eval_recall': 0.6592862927257201, 'eval_runtime': 89.3629, 'eval_samples_per_second': 77.045, 'epoch': 0.16, 'run': 'pretrained/roberta-base-ag_news-ORIG+TextMix'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1377,0.766543,0.596303,234.3709,76.801
2000,0.5686,0.496281,0.659713,234.0096,76.92
3000,0.5266,0.519208,0.652801,234.5166,76.754
4000,0.5096,0.492097,0.662016,234.9206,76.622
5000,0.5166,0.498778,0.656389,234.9707,76.605
6000,0.4955,0.495899,0.631686,235.1178,76.557
7000,0.5077,0.45751,0.658363,235.3834,76.471
8000,0.4797,0.466726,0.675621,234.9672,76.606
9000,0.5022,0.537422,0.66296,234.5018,76.758
10000,0.4827,0.475589,0.659696,234.0948,76.892


ORIG for pretrained/roberta-base-ag_news-ORIG+SentMix
{'eval_loss': 28.718870162963867, 'eval_accuracy': 0.9265789473684211, 'eval_f1': 0.9264409099390788, 'eval_precision': 0.9280269124277573, 'eval_recall': 0.926578947368421, 'eval_runtime': 98.0561, 'eval_samples_per_second': 77.507, 'epoch': 0.25, 'run': 'pretrained/roberta-base-ag_news-ORIG+SentMix', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+SentMix
{'eval_loss': 34.066078186035156, 'eval_accuracy': 0.8335511982570806, 'eval_f1': 0.5908967108776653, 'eval_precision': 0.6685967602921301, 'eval_recall': 0.5524350655788238, 'eval_runtime': 89.4286, 'eval_samples_per_second': 76.989, 'epoch': 0.25, 'run': 'pretrained/roberta-base-ag_news-ORIG+SentMix'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,0.9458,0.834376,0.380255,311.9894,76.926
2000,0.5278,0.441671,0.534052,311.506,77.045
3000,0.4336,0.4405,0.533294,311.2206,77.116
4000,0.4234,0.423524,0.543366,311.0111,77.168
5000,0.4115,0.437813,0.531892,310.881,77.2
6000,0.4191,0.427785,0.533463,310.8519,77.207
7000,0.4238,0.434439,0.536819,311.3974,77.072
8000,0.4442,0.419204,0.524049,311.3964,77.072
9000,0.422,0.430208,0.530205,311.4153,77.068
10000,0.4049,0.439601,0.550988,311.3837,77.075


ORIG for pretrained/roberta-base-ag_news-ORIG+WordMix
{'eval_loss': 29.938947677612305, 'eval_accuracy': 0.9242105263157895, 'eval_f1': 0.924116800973805, 'eval_precision': 0.9241857466700161, 'eval_recall': 0.9242105263157895, 'eval_runtime': 98.0989, 'eval_samples_per_second': 77.473, 'epoch': 0.2, 'run': 'pretrained/roberta-base-ag_news-ORIG+WordMix', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+WordMix
{'eval_loss': 35.76468276977539, 'eval_accuracy': 0.831517792302106, 'eval_f1': 0.6124722567486625, 'eval_precision': 0.6178121955233269, 'eval_recall': 0.6137361046471246, 'eval_runtime': 89.4029, 'eval_samples_per_second': 77.011, 'epoch': 0.2, 'run': 'pretrained/roberta-base-ag_news-ORIG+WordMix'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Loading cached split indices for dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-e574b06a521f7d4b.arrow and C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-00ad71102627cc55.arrow
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the che

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-6943bef4b721d7aa.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.





Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1000,0.8931,0.431875,0.882368,0.882211,0.887275,0.882368,237.695,31.974
2000,0.5268,0.504996,0.905921,0.905729,0.906243,0.905921,237.8805,31.949
3000,0.5106,0.478151,0.91,0.909644,0.91284,0.91,237.7479,31.967
4000,0.4918,0.465701,0.910395,0.910327,0.911163,0.910395,237.8562,31.952
5000,0.4906,0.471499,0.913026,0.91285,0.913047,0.913026,238.3336,31.888
6000,0.4952,0.524322,0.895395,0.894724,0.899958,0.895395,237.657,31.979
7000,0.4648,0.59623,0.884868,0.884514,0.893429,0.884868,238.0202,31.93
8000,0.549,0.455152,0.905658,0.905615,0.908236,0.905658,237.9922,31.934
9000,0.4701,0.46937,0.911447,0.910767,0.913161,0.911447,237.7978,31.96
10000,0.5335,0.569938,0.896579,0.896157,0.901764,0.896579,237.7712,31.964


  _warn_prf(average, modifier, msg_start, len(result))


ORIG for pretrained/xlnet-base-cased-ag_news-ORIG+ORIG
{'eval_loss': 0.47149935364723206, 'eval_accuracy': 0.9130263157894737, 'eval_f1': 0.9128498614830755, 'eval_precision': 0.9130469836991691, 'eval_recall': 0.9130263157894737, 'eval_runtime': 237.9381, 'eval_samples_per_second': 31.941, 'epoch': 0.39, 'run': 'pretrained/xlnet-base-cased-ag_news-ORIG+ORIG', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/xlnet-base-cased-ag_news-ORIG+ORIG
{'eval_loss': 0.8703216910362244, 'eval_accuracy': 0.8496732026143791, 'eval_f1': 0.6538763069868, 'eval_precision': 0.6731572192682987, 'eval_recall': 0.6364644046662674, 'eval_runtime': 201.756, 'eval_samples_per_second': 34.125, 'epoch': 0.39, 'run': 'pretrained/xlnet-base-cased-ag_news-ORIG+ORIG'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model fr

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-6943bef4b721d7aa.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.





Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
1000,1.1574,0.443578,0.861316,0.860555,0.861548,0.861316,236.2497,32.169
2000,0.453,0.543957,0.896974,0.896597,0.896916,0.896974,236.3371,32.157
3000,0.5918,0.479478,0.898947,0.899018,0.903339,0.898947,236.2366,32.171
4000,0.5804,0.57685,0.896184,0.894642,0.899134,0.896184,236.2481,32.17
5000,0.5886,0.524765,0.908026,0.907709,0.908437,0.908026,236.2712,32.166
6000,0.5434,0.457579,0.912763,0.912378,0.912797,0.912763,236.2652,32.167
7000,0.56,0.54755,0.902763,0.902602,0.910045,0.902763,236.2161,32.174
8000,0.5113,0.555692,0.908816,0.908492,0.908916,0.908816,236.2788,32.165
9000,0.5251,0.490537,0.906842,0.906092,0.909691,0.906842,236.2617,32.168
10000,0.5273,0.483726,0.914868,0.914874,0.915743,0.914868,236.28,32.165


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ORIG for pretrained/xlnet-base-cased-ag_news-ORIG+INV
{'eval_loss': 0.4209737479686737, 'eval_accuracy': 0.9157894736842105, 'eval_f1': 0.9156375582128906, 'eval_precision': 0.915605058138296, 'eval_recall': 0.9157894736842105, 'eval_runtime': 237.3088, 'eval_samples_per_second': 32.026, 'epoch': 0.37, 'run': 'pretrained/xlnet-base-cased-ag_news-ORIG+INV', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/xlnet-base-cased-ag_news-ORIG+INV
{'eval_loss': 0.6510989665985107, 'eval_accuracy': 0.8264342774146696, 'eval_f1': 0.604001301328089, 'eval_precision': 0.6114328875586422, 'eval_recall': 0.6068399284220588, 'eval_runtime': 201.1389, 'eval_samples_per_second': 34.23, 'epoch': 0.37, 'run': 'pretrained/xlnet-base-cased-ag_news-ORIG+INV'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model fr

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-6943bef4b721d7aa.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1752,0.721647,0.715345,374.9463,32.005
2000,0.6399,0.596559,0.769013,374.9381,32.005
3000,0.6215,0.578555,0.772692,375.0405,31.997
4000,0.6043,0.597769,0.765442,374.8402,32.014
5000,0.6068,0.605279,0.766563,375.2214,31.981
6000,0.5782,0.587795,0.777467,375.1037,31.991
7000,0.6051,0.685981,0.754559,374.951,32.004
8000,0.5675,0.638225,0.786434,375.2692,31.977
9000,0.5818,0.587492,0.778258,375.408,31.965


RuntimeError: CUDA error: unknown error

In [7]:
df = pd.DataFrame(results)
df

Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,epoch,run,test
0,0.403315,0.909737,0.909406,0.909337,0.909737,99.7262,76.209,0.42,pretrained/bert-base-uncased-ag_news-ORIG+ORIG,20NG
1,0.738579,0.837037,0.628058,0.629622,0.632722,90.5726,76.016,0.42,pretrained/bert-base-uncased-ag_news-ORIG+ORIG,
2,0.388141,0.919605,0.919407,0.919906,0.919605,99.5063,76.377,0.28,pretrained/bert-base-uncased-ag_news-ORIG+INV,20NG
3,0.626499,0.846042,0.647005,0.664907,0.631381,90.4282,76.138,0.28,pretrained/bert-base-uncased-ag_news-ORIG+INV,
4,28.322521,0.916579,0.916382,0.916738,0.916579,99.4292,76.436,0.32,pretrained/bert-base-uncased-ag_news-ORIG+SIB,20NG
5,30.492338,0.837763,0.635321,0.64999,0.629993,90.3602,76.195,0.32,pretrained/bert-base-uncased-ag_news-ORIG+SIB,
6,29.862549,0.912368,0.912109,0.912452,0.912368,99.7859,76.163,0.28,pretrained/bert-base-uncased-ag_news-ORIG+INVSIB,20NG
7,32.260082,0.843718,0.634814,0.640585,0.633381,90.6306,75.968,0.28,pretrained/bert-base-uncased-ag_news-ORIG+INVSIB,
8,30.897173,0.922237,0.922229,0.922574,0.922237,99.1179,76.676,0.32,pretrained/bert-base-uncased-ag_news-ORIG+TextMix,20NG
9,35.254307,0.882789,0.65278,0.674804,0.633557,89.9611,76.533,0.32,pretrained/bert-base-uncased-ag_news-ORIG+TextMix,
