In [1]:
from utils import *

from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import load_dataset, concatenate_datasets, Dataset
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [2]:
# import wandb

# wandb.login()

# %env WANDB_PROJECT = train_SST2
# %env WANDB_WATCH = 'all'
# %env WANDB_SILENT = true

In [3]:
def one_hot_encode(y, nb_classes=4):
    if not isinstance(y, np.ndarray):
        y = np.expand_dims(np.array(y), 0)
    res = np.eye(nb_classes)[np.array(y).reshape(-1)]
    return res.reshape(list(y.shape)+[nb_classes])[0]

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1.mean(),
        'precision': precision.mean(),
        'recall': recall.mean()
    }

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')

def compute_metrics_w_soft_target(pred):
    labels = pred.label_ids
    preds = pred.predictions
    acc = acc_at_k(labels, preds, k=2)
    return {
        'accuracy': acc,
    }

class Trainer_w_soft_target(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = CEwST_loss(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [4]:
from sklearn.datasets import fetch_20newsgroups

def get_20NG_test_dataset():
    cats = [
        'talk.politics.mideast',                                # Wolrd 0
        'rec.sport.hockey', 'rec.sport.baseball',               # Sports 1
        # 'misc.forsale',                                       # Business 2
        'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space', # Sci/Tech 3
    ]

    dataset = fetch_20newsgroups(
        subset='all',
        categories=cats,
        remove=('headers', 'footers', 'quotes')
    )

    df = pd.DataFrame([dataset.data, dataset.target]).T
    df.rename(columns={0:'text', 1: 'label'}, inplace=True)

    mapper = {
        0: 1,
        1: 1,
        2: 3,
        3: 3,
        4: 3,
        5: 3,
        6: 0,
    }

    df.label = df.label.map(mapper)
    df.text = df.text.replace('\n', ' ', regex=True).str.strip()

    test_dataset = Dataset.from_pandas(df)
    
    return test_dataset

In [None]:
# ['ORIG', INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']
# ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']

In [5]:
MODEL_NAMES = ['roberta-base', 'xlnet-base-cased']

In [6]:
use_pretrain = False

for MODEL_NAME in MODEL_NAMES:
    for t in ['INV']: 
        
        # run = wandb.init(project="train_SST2-"+t, reinit=True)
                
        soft_target = False
        eval_only = False
        
        checkpoint = 'pretrained/' + MODEL_NAME + "-ag_news-ORIG+" + t 
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        
        if t == 'ORIG':
            train_dataset = load_dataset('ag_news', split='train')
        else:
            # load custom data    
            text = npy_load("./assets/AG_NEWS/" + t + "/text.npy")
            label = npy_load("./assets/AG_NEWS/" + t + "/label.npy")
            if len(label.shape) > 1:
                df = pd.DataFrame({'text': text, 'label': label.tolist()})
                df.text = df.text.astype(str)
                df.label = df.label.map(lambda y: np.array(y))
            else:
                df = pd.DataFrame({'text': text, 'label': label})
                df.text = df.text.astype(str)
                df.label = df.label.astype(object)
            train_dataset = Dataset.from_pandas(df)  
            
            # load orig data
            orig_dataset = load_dataset('ag_news', split='train')
            df = orig_dataset.to_pandas()
            df = df[df.columns[::-1]]
            df.text = df.text.astype(str)
            if len(label.shape) > 1:
                df.label = df.label.map(one_hot_encode)
            else:
                df.label = df.label.astype(object)
            orig_dataset = Dataset.from_pandas(df)
            
            # merge orig + custom data
            train_dataset = concatenate_datasets([orig_dataset, train_dataset])
            train_dataset.shuffle()
            
        if use_pretrain and os.path.exists(checkpoint):
            print('loading {}...'.format(checkpoint))
            MODEL_NAME = checkpoint
            eval_only = True
                
        dataset_dict = train_dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']
        test_dataset = load_dataset('ag_news')['test']
        test_dataset_20NG = get_20NG_test_dataset()
        
        # # reduce training time
        # n = 10000
        # train_dataset = Dataset.from_dict(train_dataset[:n])
        # eval_dataset = Dataset.from_dict(eval_dataset[:n])
        # test_dataset = Dataset.from_dict(test_dataset[:n])

        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=4).to(device)
                
        train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))
        eval_dataset = eval_dataset.map(tokenize, batched=True, batch_size=len(eval_dataset))
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        test_dataset_20NG = test_dataset_20NG.map(tokenize, batched=True, batch_size=len(test_dataset_20NG))
        train_dataset.rename_column_('label', 'labels')
        eval_dataset.rename_column_('label', 'labels')
        test_dataset.rename_column_('label', 'labels')
        test_dataset_20NG.rename_column_('label', 'labels')
        train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        eval_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset_20NG.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        if len(np.array(train_dataset['labels']).shape) > 1:
            soft_target = True

        train_batch_size = 3
        eval_batch_size = 32
        num_epoch = 3
        gradient_accumulation_steps=1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)
        
        training_args = TrainingArguments(
            seed=1,
            # adafactor=True,
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=1000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            run_name=checkpoint,
            label_names=['World', 'Sports', 'Business', 'Sci/Tech']
        )
        
        if soft_target:
            trainer = Trainer_w_soft_target(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics_w_soft_target,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
                data_collator=DefaultCollator(),
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )
        else: 
            trainer = Trainer(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics,
                train_dataset=train_dataset,
                eval_dataset=test_dataset,
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )

        if not eval_only:
            trainer.train()
            
        trainer.compute_metrics = compute_metrics
        
        # test with ORIG data
        trainer.eval_dataset = test_dataset
        out_orig = trainer.evaluate()
        print('ORIG for {}\n{}'.format(checkpoint, out_orig))
        
        # test with 20NG data
        trainer.eval_dataset = test_dataset_20NG
        out_20NG = trainer.evaluate()
        print('20NG for {}\n{}'.format(checkpoint, out_20NG))
        
        # run.finish()

Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

rename_column_ is deprecated and will be removed in the next major version of datasets. Use the dataset.rename_column method instead.





W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
500,1.3918,1.279394,0.460526,0.415678,0.524844,0.460526,98.0531,77.509
1000,1.09,0.744924,0.851316,0.851317,0.855698,0.851316,98.8093,76.916
1500,0.57,0.365533,0.884737,0.884499,0.886943,0.884737,98.5294,77.134
2000,0.4762,0.394324,0.895,0.894711,0.895307,0.895,98.491,77.164
2500,0.4872,0.428221,0.901842,0.90177,0.902269,0.901842,98.469,77.182
3000,0.514,0.4574,0.899079,0.898208,0.902952,0.899079,98.5455,77.122
3500,0.4835,0.426663,0.908421,0.908615,0.910561,0.908421,98.6422,77.046
4000,0.5225,0.397863,0.908421,0.908513,0.910502,0.908421,98.6426,77.046
4500,0.4337,0.582612,0.889342,0.889155,0.893778,0.889342,98.658,77.034
5000,0.5184,0.435768,0.908947,0.90919,0.91163,0.908947,98.6065,77.074


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+INV
{'eval_loss': 0.3981076180934906, 'eval_accuracy': 0.9157894736842105, 'eval_f1': 0.9156453459413902, 'eval_precision': 0.9162553217551063, 'eval_recall': 0.9157894736842105, 'eval_runtime': 99.1891, 'eval_samples_per_second': 76.621, 'epoch': 0.16}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+INV
{'eval_loss': 0.7608842849731445, 'eval_accuracy': 0.8225127087872186, 'eval_f1': 0.6387646402833478, 'eval_precision': 0.6607739782946607, 'eval_recall': 0.6189966234759033, 'eval_runtime': 89.9924, 'eval_samples_per_second': 76.506, 'epoch': 0.16}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.4014,1.339915,0.373342,313.5371,76.546
1000,1.1861,0.957919,0.693596,312.8604,76.712
1500,0.7538,0.62599,0.776996,312.8432,76.716
2000,0.6614,0.576724,0.780144,313.373,76.586
2500,0.5843,0.583685,0.769466,313.5155,76.551
3000,0.5337,0.569334,0.789183,313.6461,76.519
3500,0.6033,0.617323,0.76827,313.7001,76.506
4000,0.5772,0.586748,0.781876,313.6802,76.511
4500,0.559,0.584709,0.777206,313.6499,76.518
5000,0.6033,0.56835,0.759837,313.6408,76.521


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+SIB
{'eval_loss': 24.4759578704834, 'eval_accuracy': 0.9089473684210526, 'eval_f1': 0.9086363385138754, 'eval_precision': 0.9091866368529096, 'eval_recall': 0.9089473684210526, 'eval_runtime': 99.5726, 'eval_samples_per_second': 76.326, 'epoch': 0.11}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+SIB
{'eval_loss': 29.666675567626953, 'eval_accuracy': 0.9003631082062454, 'eval_f1': 0.6651768987063545, 'eval_precision': 0.6838086494709936, 'eval_recall': 0.6498429853678157, 'eval_runtime': 90.4223, 'eval_samples_per_second': 76.143, 'epoch': 0.11}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.3649,1.291298,0.452991,314.6647,76.272
1000,1.0584,0.760444,0.788323,314.0906,76.411
1500,0.6409,0.532847,0.816937,314.1072,76.407
2000,0.529,0.508033,0.833269,314.0715,76.416
2500,0.5441,0.545907,0.835639,314.0515,76.421
3000,0.5024,0.496954,0.830333,314.2084,76.382
3500,0.5438,0.560293,0.826574,314.0878,76.412
4000,0.5255,0.544773,0.826733,314.2234,76.379
4500,0.5043,0.532814,0.83537,314.05,76.421
5000,0.5297,0.522391,0.832855,314.0456,76.422


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+INVSIB
{'eval_loss': 30.694591522216797, 'eval_accuracy': 0.9184210526315789, 'eval_f1': 0.9183232318745234, 'eval_precision': 0.9203697597715643, 'eval_recall': 0.918421052631579, 'eval_runtime': 99.1659, 'eval_samples_per_second': 76.639, 'epoch': 0.25}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+INVSIB
{'eval_loss': 35.89020538330078, 'eval_accuracy': 0.8530137981118373, 'eval_f1': 0.6478650292411576, 'eval_precision': 0.6847052801624964, 'eval_recall': 0.6173132652899586, 'eval_runtime': 90.0866, 'eval_samples_per_second': 76.426, 'epoch': 0.25}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.3676,1.199957,0.468375,314.3375,76.351
1000,1.0187,0.833962,0.711344,313.9753,76.439
1500,0.7156,0.622826,0.767936,313.9755,76.439
2000,0.6348,0.562511,0.788578,313.8542,76.469
2500,0.5947,0.537453,0.790736,313.8623,76.467
3000,0.5273,0.572446,0.769876,313.8838,76.461
3500,0.5987,0.563389,0.775516,313.7298,76.499
4000,0.5817,0.563921,0.781201,313.7756,76.488
4500,0.5412,0.587844,0.790214,313.9084,76.455
5000,0.5437,0.584665,0.79108,313.9139,76.454


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+TextMix
{'eval_loss': 27.844295501708984, 'eval_accuracy': 0.9173684210526316, 'eval_f1': 0.9173654941024086, 'eval_precision': 0.9178486123968598, 'eval_recall': 0.9173684210526316, 'eval_runtime': 99.2833, 'eval_samples_per_second': 76.549, 'epoch': 0.17}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+TextMix
{'eval_loss': 29.816225051879883, 'eval_accuracy': 0.8753812636165578, 'eval_f1': 0.6571557615048226, 'eval_precision': 0.6771921957433351, 'eval_recall': 0.6397893704717308, 'eval_runtime': 90.1324, 'eval_samples_per_second': 76.388, 'epoch': 0.17}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.1511,1.114268,0.323402,470.1233,76.576
1000,1.0336,0.884952,0.521566,469.438,76.687
1500,0.7631,0.649266,0.602489,18897.1544,1.905
2000,0.5862,0.572656,0.618744,88473.8522,0.407
2500,0.5685,0.512929,0.647215,468.6654,76.814
3000,0.5282,0.501517,0.642441,469.3269,76.706
3500,0.5222,0.505045,0.631754,469.5193,76.674
4000,0.4813,0.494097,0.65136,469.5776,76.665
4500,0.5004,0.506767,0.651022,469.6186,76.658
5000,0.4969,0.496959,0.651673,469.6544,76.652


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+SentMix
{'eval_loss': 30.52907943725586, 'eval_accuracy': 0.9234210526315789, 'eval_f1': 0.9235062438613701, 'eval_precision': 0.9243024338768785, 'eval_recall': 0.9234210526315789, 'eval_runtime': 98.7901, 'eval_samples_per_second': 76.931, 'epoch': 0.19}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+SentMix
{'eval_loss': 31.589269638061523, 'eval_accuracy': 0.8697167755991285, 'eval_f1': 0.6420058179069323, 'eval_precision': 0.6875337737021379, 'eval_recall': 0.6091545591811411, 'eval_runtime': 89.6363, 'eval_samples_per_second': 76.81, 'epoch': 0.19}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,0.9903,0.94498,0.233069,628.0904,76.422
1000,0.9265,0.807549,0.400352,627.8476,76.452
1500,0.7372,0.615274,0.47442,627.8554,76.451
2000,0.5775,0.503713,0.509235,628.4586,76.377
2500,0.4989,0.460997,0.521279,628.9167,76.322
3000,0.4496,0.44889,0.521158,629.0286,76.308
3500,0.4617,0.447766,0.524062,629.417,76.261
4000,0.4477,0.438179,0.512496,629.7925,76.216
4500,0.4334,0.418281,0.52787,629.7813,76.217
5000,0.4155,0.424507,0.519998,629.8357,76.21


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+WordMix
{'eval_loss': 29.310897827148438, 'eval_accuracy': 0.9189473684210526, 'eval_f1': 0.9187454210176963, 'eval_precision': 0.9192500556299591, 'eval_recall': 0.9189473684210525, 'eval_runtime': 99.5289, 'eval_samples_per_second': 76.36, 'epoch': 0.11}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+WordMix
{'eval_loss': 33.30519104003906, 'eval_accuracy': 0.8854030501089325, 'eval_f1': 0.6563653404860761, 'eval_precision': 0.6732143055544277, 'eval_recall': 0.6416973260155483, 'eval_runtime': 90.4387, 'eval_samples_per_second': 76.129, 'epoch': 0.11}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,8.5316,8.248725,0.4315,312.2927,76.851
1000,8.4472,8.248702,0.007625,312.0548,76.91
1500,8.6663,8.24871,0.00075,311.8809,76.952
2000,8.3435,8.248634,0.000125,311.953,76.935
2500,8.1615,8.248737,1.755,312.0293,76.916
3000,8.5533,8.248922,0.048875,311.8622,76.957
3500,8.3274,8.24866,0.0,311.8373,76.963
4000,8.4387,8.248704,0.03975,311.9287,76.941
4500,8.2423,8.248625,1.605625,311.7783,76.978
5000,8.2495,8.249076,2.996375,311.8759,76.954


ORIG for pretrained/roberta-base-ag_news-ORIG+INV
{'eval_loss': 8.318387985229492, 'eval_accuracy': 0.24986842105263157, 'eval_f1': 0.100031605562579, 'eval_precision': 0.06253292939936776, 'eval_recall': 0.24986842105263157, 'eval_runtime': 98.7048, 'eval_samples_per_second': 76.997, 'epoch': 0.14}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+INV
{'eval_loss': 11.154783248901367, 'eval_accuracy': 0.00014524328249818446, 'eval_f1': 0.000250501002004008, 'eval_precision': 0.08333333333333333, 'eval_recall': 0.0001254390366281987, 'eval_runtime': 89.9665, 'eval_samples_per_second': 76.529, 'epoch': 0.14}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.3891,1.377349,0.244517,312.2836,76.853
1000,1.0254,0.634774,0.771021,311.6798,77.002
1500,0.5981,0.640084,0.78571,311.493,77.048
2000,0.6511,0.568483,0.787408,311.4885,77.049
2500,0.5817,0.626662,0.789294,311.5097,77.044
3000,0.5474,0.592289,0.790893,311.6116,77.019
3500,0.6387,0.61253,0.774661,311.8069,76.971
4000,0.5797,0.588685,0.796113,311.2974,77.097
4500,0.5716,0.592107,0.798151,310.8374,77.211
5000,0.6204,0.597476,0.740854,310.6942,77.246


ORIG for pretrained/roberta-base-ag_news-ORIG+SIB
{'eval_loss': 27.992504119873047, 'eval_accuracy': 0.9146052631578947, 'eval_f1': 0.9146066311486704, 'eval_precision': 0.9151864499847272, 'eval_recall': 0.9146052631578947, 'eval_runtime': 98.1858, 'eval_samples_per_second': 77.404, 'epoch': 0.24}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+SIB
{'eval_loss': 33.876922607421875, 'eval_accuracy': 0.8152505446623094, 'eval_f1': 0.6161710661872044, 'eval_precision': 0.6353659433310554, 'eval_recall': 0.6082873968734226, 'eval_runtime': 89.5341, 'eval_samples_per_second': 76.898, 'epoch': 0.24}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.3822,1.371978,0.256862,312.6748,76.757
1000,0.9886,0.558032,0.810924,312.4439,76.814
1500,0.5401,0.542028,0.836299,312.414,76.821
2000,0.5583,0.538455,0.836084,312.5356,76.791
2500,0.5473,0.54473,0.84362,312.4296,76.817
3000,0.5141,0.520598,0.838724,312.4072,76.823
3500,0.5584,0.585717,0.843131,312.5269,76.793
4000,0.5647,0.518247,0.830847,312.5423,76.79
4500,0.5449,0.521081,0.840007,312.6392,76.766
5000,0.5416,0.504393,0.843257,312.6764,76.757


ORIG for pretrained/roberta-base-ag_news-ORIG+INVSIB
{'eval_loss': 28.53048324584961, 'eval_accuracy': 0.9172368421052631, 'eval_f1': 0.9168902171309844, 'eval_precision': 0.9169935930710416, 'eval_recall': 0.9172368421052632, 'eval_runtime': 98.0926, 'eval_samples_per_second': 77.478, 'epoch': 0.24}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+INVSIB
{'eval_loss': 32.78082275390625, 'eval_accuracy': 0.8614379084967321, 'eval_f1': 0.6371545302917003, 'eval_precision': 0.6375424593791854, 'eval_recall': 0.637571651471424, 'eval_runtime': 89.4302, 'eval_samples_per_second': 76.987, 'epoch': 0.24}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.3847,1.373084,0.268592,313.1575,76.639
1000,0.999,0.611669,0.788161,312.6999,76.751
1500,0.5785,0.580798,0.792831,312.6704,76.758
2000,0.625,0.567294,0.80766,312.7833,76.73
2500,0.6117,0.55786,0.802769,312.6033,76.775
3000,0.5319,0.629352,0.769624,312.5506,76.788
3500,0.621,0.568968,0.811223,312.8133,76.723
4000,0.5944,0.585694,0.783193,312.8648,76.71
4500,0.5532,0.636635,0.775946,312.8243,76.72
5000,0.5825,0.593835,0.776256,312.8736,76.708


ORIG for pretrained/roberta-base-ag_news-ORIG+TextMix
{'eval_loss': 28.125120162963867, 'eval_accuracy': 0.9030263157894737, 'eval_f1': 0.9027590040639824, 'eval_precision': 0.9083255026965441, 'eval_recall': 0.9030263157894738, 'eval_runtime': 97.9335, 'eval_samples_per_second': 77.604, 'epoch': 0.12}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+TextMix
{'eval_loss': 35.096580505371094, 'eval_accuracy': 0.8883079157588961, 'eval_f1': 0.647538205679139, 'eval_precision': 0.6903325430220992, 'eval_recall': 0.6185810250868564, 'eval_runtime': 89.2875, 'eval_samples_per_second': 77.11, 'epoch': 0.12}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.156,1.155155,0.232862,467.0513,77.079
1000,0.9897,0.624172,0.609861,467.0265,77.083
1500,0.5626,0.535716,0.641618,467.0592,77.078
2000,0.5026,0.542142,0.65168,467.0557,77.079
2500,0.5492,0.508811,0.641255,467.4121,77.02
3000,0.4986,0.491901,0.657055,467.6199,76.986
3500,0.515,0.482801,0.663657,467.5558,76.996
4000,0.4903,0.48864,0.668669,467.7369,76.966
4500,0.4869,0.496708,0.658924,467.5965,76.989
5000,0.5012,0.513449,0.647862,467.8328,76.951


ORIG for pretrained/roberta-base-ag_news-ORIG+SentMix
{'eval_loss': 29.974964141845703, 'eval_accuracy': 0.9276315789473685, 'eval_f1': 0.9274385832425893, 'eval_precision': 0.9273289076908191, 'eval_recall': 0.9276315789473685, 'eval_runtime': 98.4892, 'eval_samples_per_second': 77.166, 'epoch': 0.13}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+SentMix
{'eval_loss': 35.96977233886719, 'eval_accuracy': 0.8592592592592593, 'eval_f1': 0.6423933170822469, 'eval_precision': 0.6444988862898565, 'eval_recall': 0.6471557154969605, 'eval_runtime': 89.7269, 'eval_samples_per_second': 76.733, 'epoch': 0.13}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-2682ce8e25ccad82.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,0.9729,0.962359,0.17175,622.0043,77.17
1000,0.9868,0.902585,0.401629,621.5097,77.231
1500,0.6384,0.469298,0.524624,621.8656,77.187
2000,0.4876,0.443331,0.527778,622.2259,77.142
2500,0.4462,0.43402,0.529342,622.3569,77.126
3000,0.4248,0.420592,0.543279,622.4057,77.12
3500,0.4417,0.439362,0.531302,622.5493,77.102
4000,0.4369,0.43582,0.523751,622.811,77.07
4500,0.435,0.428152,0.546004,622.8596,77.064
5000,0.4244,0.425502,0.539215,623.1927,77.023


ORIG for pretrained/roberta-base-ag_news-ORIG+WordMix
{'eval_loss': 24.538375854492188, 'eval_accuracy': 0.9267105263157894, 'eval_f1': 0.9267659838994593, 'eval_precision': 0.9271496483776024, 'eval_recall': 0.9267105263157895, 'eval_runtime': 98.432, 'eval_samples_per_second': 77.211, 'epoch': 0.13}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/roberta-base-ag_news-ORIG+WordMix
{'eval_loss': 28.26263999938965, 'eval_accuracy': 0.8326797385620915, 'eval_f1': 0.5882521925204612, 'eval_precision': 0.6174529757388829, 'eval_recall': 0.567786621256273, 'eval_runtime': 89.7437, 'eval_samples_per_second': 76.718, 'epoch': 0.13}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model fr

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,8.6714,8.285028,0.65625,751.1307,31.952
1000,8.49,8.253657,0.765375,750.8105,31.965
1500,8.6828,8.251933,0.809125,751.0295,31.956
2000,8.3524,8.250118,1.669375,751.3022,31.945
2500,8.1662,8.248649,0.168625,750.732,31.969
3000,8.5562,8.248621,1.8395,750.4139,31.982
3500,8.3294,8.249295,0.0,750.1993,31.991
4000,8.4392,8.248896,0.0,750.6866,31.971
4500,8.2425,8.24894,0.0,750.5331,31.977
5000,8.2494,8.248578,0.0,749.9663,32.001


ORIG for pretrained/xlnet-base-cased-ag_news-ORIG+INV
{'eval_loss': 8.318270683288574, 'eval_accuracy': 0.25, 'eval_f1': 0.1, 'eval_precision': 0.0625, 'eval_recall': 0.25, 'eval_runtime': 222.3891, 'eval_samples_per_second': 34.174, 'epoch': 0.18}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/xlnet-base-cased-ag_news-ORIG+INV
{'eval_loss': 11.154634475708008, 'eval_accuracy': 0.00014524328249818446, 'eval_f1': 0.00012635835228708617, 'eval_precision': 0.05, 'eval_recall': 6.325910931174089e-05, 'eval_runtime': 201.6115, 'eval_samples_per_second': 34.15, 'epoch': 0.18}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model fr

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-6943bef4b721d7aa.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.3824,1.174077,0.553556,746.4727,32.151
1000,0.9519,0.698672,0.732767,746.5082,32.15
1500,0.6539,0.639262,0.778344,746.4598,32.152
2000,0.6852,0.621415,0.773479,746.4741,32.151
2500,0.6079,0.630789,0.7732,746.6178,32.145
3000,0.5652,0.644635,0.767737,746.6763,32.142
3500,0.6422,0.590145,0.786911,746.5463,32.148
4000,0.5997,0.591975,0.784199,746.461,32.152
4500,0.5689,0.645393,0.769057,746.4881,32.151
5000,0.6301,0.606981,0.748131,746.6169,32.145


ORIG for pretrained/xlnet-base-cased-ag_news-ORIG+SIB
{'eval_loss': 31.604143142700195, 'eval_accuracy': 0.9025, 'eval_f1': 0.9018980761938504, 'eval_precision': 0.9055888909995566, 'eval_recall': 0.9025, 'eval_runtime': 222.161, 'eval_samples_per_second': 34.209, 'epoch': 0.12}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/xlnet-base-cased-ag_news-ORIG+SIB
{'eval_loss': 31.991535186767578, 'eval_accuracy': 0.9000726216412491, 'eval_f1': 0.6618761747173765, 'eval_precision': 0.6813051453656644, 'eval_recall': 0.6453741006942768, 'eval_runtime': 201.337, 'eval_samples_per_second': 34.196, 'epoch': 0.12}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model fr

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-6943bef4b721d7aa.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.3723,1.208815,0.55763,750.7494,31.968
1000,0.9241,0.57301,0.79776,750.245,31.99
1500,0.5719,0.515466,0.826374,749.5869,32.018
2000,0.5379,0.548727,0.834158,749.4644,32.023
2500,0.5661,0.578794,0.832421,749.841,32.007
3000,0.548,0.677821,0.813228,749.9742,32.001
3500,0.5874,0.576479,0.842924,749.8581,32.006
4000,0.5787,0.520947,0.834613,750.3491,31.985
4500,0.5317,0.540147,0.834642,749.9657,32.001
5000,0.5927,0.589095,0.830191,750.0485,31.998


ORIG for pretrained/xlnet-base-cased-ag_news-ORIG+INVSIB
{'eval_loss': 30.015947341918945, 'eval_accuracy': 0.9157894736842105, 'eval_f1': 0.915808306006684, 'eval_precision': 0.9163775179707737, 'eval_recall': 0.9157894736842105, 'eval_runtime': 222.4828, 'eval_samples_per_second': 34.16, 'epoch': 0.17}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/xlnet-base-cased-ag_news-ORIG+INVSIB
{'eval_loss': 34.32645797729492, 'eval_accuracy': 0.8175744371822803, 'eval_f1': 0.6200709479497988, 'eval_precision': 0.630872474245495, 'eval_recall': 0.6290870740005385, 'eval_runtime': 201.6809, 'eval_samples_per_second': 34.138, 'epoch': 0.17}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model fr

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-6943bef4b721d7aa.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.3628,1.190289,0.559373,749.2576,32.032
1000,0.9577,0.678102,0.729556,748.5994,32.06
1500,0.6452,0.612779,0.773538,748.9731,32.044
2000,0.6437,0.570695,0.789118,748.3781,32.069
2500,0.6234,0.57762,0.78019,748.4624,32.066
3000,0.5505,0.627197,0.766079,748.6157,32.059
3500,0.6392,0.636824,0.778618,748.4897,32.065
4000,0.6113,0.553559,0.794927,747.8421,32.092
4500,0.5711,0.630268,0.781509,748.4132,32.068
5000,0.6113,0.613513,0.770542,748.2901,32.073


ORIG for pretrained/xlnet-base-cased-ag_news-ORIG+TextMix
{'eval_loss': 27.249120712280273, 'eval_accuracy': 0.916578947368421, 'eval_f1': 0.9163047903687029, 'eval_precision': 0.91703357855582, 'eval_recall': 0.9165789473684212, 'eval_runtime': 222.3071, 'eval_samples_per_second': 34.187, 'epoch': 0.16}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/xlnet-base-cased-ag_news-ORIG+TextMix
{'eval_loss': 30.009477615356445, 'eval_accuracy': 0.8673928830791576, 'eval_f1': 0.6506783714026064, 'eval_precision': 0.6578139263566914, 'eval_recall': 0.6463484962788832, 'eval_runtime': 201.4438, 'eval_samples_per_second': 34.178, 'epoch': 0.16}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model fr

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-6943bef4b721d7aa.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.1554,1.073364,0.384284,1125.1704,31.995
1000,0.9623,0.721829,0.563439,1125.1186,31.997
1500,0.6527,0.572934,0.611174,1125.0837,31.998
2000,0.5774,0.569536,0.627017,1125.1221,31.997
2500,0.5874,0.517023,0.644191,1125.1455,31.996
3000,0.5349,0.503407,0.630524,1125.2445,31.993
3500,0.5251,0.490855,0.64165,1127.5727,31.927
4000,0.4881,0.505578,0.650297,1128.2604,31.908
4500,0.5085,0.490924,0.647578,1128.2784,31.907
5000,0.5204,0.501576,0.632909,1126.1024,31.969


ORIG for pretrained/xlnet-base-cased-ag_news-ORIG+SentMix
{'eval_loss': 30.555891036987305, 'eval_accuracy': 0.9271052631578948, 'eval_f1': 0.9270176507556998, 'eval_precision': 0.9269841536717776, 'eval_recall': 0.9271052631578948, 'eval_runtime': 222.2903, 'eval_samples_per_second': 34.19, 'epoch': 0.16}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/xlnet-base-cased-ag_news-ORIG+SentMix
{'eval_loss': 35.8184928894043, 'eval_accuracy': 0.8191721132897604, 'eval_f1': 0.584935403376821, 'eval_precision': 0.5995900430213603, 'eval_recall': 0.6008162850221705, 'eval_runtime': 201.4581, 'eval_samples_per_second': 34.176, 'epoch': 0.16}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model fr

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-6943bef4b721d7aa.arrow


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
500,1.0106,0.938854,0.248261,1495.6851,32.092
1000,0.9147,0.729477,0.425478,1496.0632,32.084
1500,0.6749,0.528664,0.480196,1496.2287,32.081
2000,0.5353,0.486066,0.500718,1495.0953,32.105
2500,0.4919,0.456192,0.514698,1494.7842,32.112
3000,0.4645,0.449363,0.526991,1495.1311,32.104
3500,0.47,0.455244,0.524048,1495.3432,32.1
4000,0.4655,0.441125,0.521441,1495.4049,32.098
4500,0.4509,0.435766,0.536386,1495.0243,32.107
5000,0.4267,0.446521,0.529858,1495.2555,32.102


ORIG for pretrained/xlnet-base-cased-ag_news-ORIG+WordMix
{'eval_loss': 32.73589324951172, 'eval_accuracy': 0.9146052631578947, 'eval_f1': 0.9142334848723681, 'eval_precision': 0.9140987901658261, 'eval_recall': 0.9146052631578947, 'eval_runtime': 222.1168, 'eval_samples_per_second': 34.216, 'epoch': 0.1}
20NG for pretrained/xlnet-base-cased-ag_news-ORIG+WordMix
{'eval_loss': 36.50894546508789, 'eval_accuracy': 0.8668119099491649, 'eval_f1': 0.6505277284454216, 'eval_precision': 0.6616983861911796, 'eval_recall': 0.6417526821089767, 'eval_runtime': 201.2307, 'eval_samples_per_second': 34.214, 'epoch': 0.1}


  _warn_prf(average, modifier, msg_start, len(result))
