In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import load_dataset, Dataset
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import os

from utils import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [2]:
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1.mean(),
        'precision': precision.mean(),
        'recall': recall.mean()
    }

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')

def compute_metrics_w_soft_target(pred):
    labels = pred.label_ids
    preds = pred.predictions
    acc = acc_at_k(labels, preds, k=2)
    return {
        'accuracy': acc,
    }

class Trainer_w_soft_target(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = CEwST_loss(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [3]:
MODEL_NAMES = ['roberta-base', 'xlnet-base-cased'] # ['bert-base-uncased']

In [5]:
use_pretrain = True
soft_target = False

for t in ['ORIG', 'INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']: 
    for MODEL_NAME in MODEL_NAMES:
        
        eval_only = False
        
        checkpoint = 'pretrained/' + MODEL_NAME + "-sst2-" + t 
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        
        if t == 'ORIG':
            train_dataset = load_dataset('glue', 'sst2', split='train[:90%]')
            train_dataset.rename_column_('sentence', 'text')
        else:
            # load custom data    
            text = npy_load("./assets/SST2/" + t + "/text.npy")
            label = npy_load("./assets/SST2/" + t + "/label.npy")
            if len(label.shape) > 1:
                df = pd.DataFrame({'text': text, 'label': label.tolist()})
                df.text = df.text.astype(str)
                df.label = df.label.map(lambda y: np.array(y))
            else:
                df = pd.DataFrame({'text': text, 'label': label})
                df.text = df.text.astype(str)
                df.label = df.label.astype(int)
            train_dataset = Dataset.from_pandas(df)  
            
        if use_pretrain and os.path.exists(checkpoint):
            print('loading {}...'.format(checkpoint))
            MODEL_NAME = checkpoint
            eval_only = True
            
        # split to get train
        dataset_dict = train_dataset.train_test_split(
            test_size = 0.1,
            train_size = 0.9,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']
        test_dataset = load_dataset('glue', 'sst2', split='train[-10%:]')
        test_dataset.rename_column_('sentence', 'text')
        
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
            
        train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))
        eval_dataset = eval_dataset.map(tokenize, batched=True, batch_size=len(eval_dataset))
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        train_dataset.rename_column_('label', 'labels')
        eval_dataset.rename_column_('label', 'labels')
        test_dataset.rename_column_('label', 'labels')
        train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        eval_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        if len(np.array(train_dataset['labels']).shape) > 1:
            soft_target = True
        
        train_batch_size = 8
        eval_batch_size = 32
        num_epoch = 10
        max_steps = int((len(train_dataset) * num_epoch) / train_batch_size)

        training_args = TrainingArguments(
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=int(max_steps / 10),
            load_best_model_at_end=True,
            metric_for_best_model="loss",
            greater_is_better=False,
            evaluation_strategy="steps"
        )

        if soft_target:
            trainer = Trainer_w_soft_target(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics_w_soft_target,
                train_dataset=train_dataset,
                eval_dataset=test_dataset,
                data_collator=DefaultCollator(),
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )
        else: 
            trainer = Trainer(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics,
                train_dataset=train_dataset,
                eval_dataset=test_dataset,
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )

        if not eval_only:
            trainer.train()
        
        trainer.compute_metrics = compute_metrics
            
        # test ORIG
        trainer.eval_dataset = test_dataset
        out = trainer.evaluate()
        print('ORIG for {}\n{}'.format(checkpoint, out))

Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


loading pretrained/roberta-base-sst2-ORIG...


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-064c92ac365a3073.arrow





  return torch.tensor(x, **format_kwargs)


ORIG for pretrained/roberta-base-sst2-ORIG
{'eval_loss': 0.37622275948524475, 'eval_accuracy': 0.8913140311804009, 'eval_f1': 0.8877689504886881, 'eval_precision': 0.8989870058042657, 'eval_recall': 0.8827442562478163, 'eval_runtime': 21.8586, 'eval_samples_per_second': 308.116}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Loading cached split indices for dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-bf5e183f00a0c14a.arrow and C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-3ef356105e3dfa64.arrow


loading pretrained/xlnet-base-cased-sst2-ORIG...


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-25ef0a763c470f18.arrow
Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-d8e5390c797a96a7.arrow
Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-6eefcfe7488fe508.arrow


ORIG for pretrained/xlnet-base-cased-sst2-ORIG
{'eval_loss': 0.39658862352371216, 'eval_accuracy': 0.8884929472902747, 'eval_f1': 0.8864007989789248, 'eval_precision': 0.8881341881466108, 'eval_recall': 0.8850543855717403, 'eval_runtime': 34.1683, 'eval_samples_per_second': 197.112}
loading pretrained/roberta-base-sst2-INV...


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-064c92ac365a3073.arrow





ORIG for pretrained/roberta-base-sst2-INV
{'eval_loss': 0.40975332260131836, 'eval_accuracy': 0.828656273199703, 'eval_f1': 0.8225940534396099, 'eval_precision': 0.8342121442918468, 'eval_recall': 0.8182732424347438, 'eval_runtime': 20.799, 'eval_samples_per_second': 323.813}
loading pretrained/xlnet-base-cased-sst2-INV...


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-6eefcfe7488fe508.arrow





ORIG for pretrained/xlnet-base-cased-sst2-INV
{'eval_loss': 0.3850291967391968, 'eval_accuracy': 0.8565701559020045, 'eval_f1': 0.855267309700608, 'eval_precision': 0.854077452089258, 'eval_recall': 0.8576819284458217, 'eval_runtime': 36.3688, 'eval_samples_per_second': 185.186}
loading pretrained/roberta-base-sst2-SIB...


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-064c92ac365a3073.arrow





ORIG for pretrained/roberta-base-sst2-SIB
{'eval_loss': 0.3838280737400055, 'eval_accuracy': 0.8733481811432814, 'eval_f1': 0.8725709238196842, 'eval_precision': 0.8716843615494978, 'eval_recall': 0.876776217056839, 'eval_runtime': 22.2173, 'eval_samples_per_second': 303.142}
loading pretrained/xlnet-base-cased-sst2-SIB...


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-6eefcfe7488fe508.arrow





ORIG for pretrained/xlnet-base-cased-sst2-SIB
{'eval_loss': 0.5446259379386902, 'eval_accuracy': 0.7711952487008167, 'eval_f1': 0.7503066303439334, 'eval_precision': 0.8077957336881756, 'eval_recall': 0.7471029296572937, 'eval_runtime': 35.8298, 'eval_samples_per_second': 187.972}
loading pretrained/roberta-base-sst2-INVSIB...


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-064c92ac365a3073.arrow





ORIG for pretrained/roberta-base-sst2-INVSIB
{'eval_loss': 0.4327160120010376, 'eval_accuracy': 0.8877505567928731, 'eval_f1': 0.8850032197634937, 'eval_precision': 0.8901211151736745, 'eval_recall': 0.8819792997233376, 'eval_runtime': 22.2233, 'eval_samples_per_second': 303.061}
loading pretrained/xlnet-base-cased-sst2-INVSIB...


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-6eefcfe7488fe508.arrow





ORIG for pretrained/xlnet-base-cased-sst2-INVSIB
{'eval_loss': 0.43277329206466675, 'eval_accuracy': 0.8791388270230142, 'eval_f1': 0.8787301019055664, 'eval_precision': 0.8794642739037457, 'eval_recall': 0.8851211973238784, 'eval_runtime': 36.202, 'eval_samples_per_second': 186.04}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSe

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-064c92ac365a3073.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
6819,0.4633,0.273262,0.890869,22.336,301.532
13638,0.4144,0.267735,0.838604,21.247,316.986
20457,0.3814,0.231108,0.870973,23.3968,287.86
27276,0.3599,0.215078,0.903935,22.4535,299.953
34095,0.3457,0.227574,0.871269,22.4715,299.713
40914,0.3309,0.254836,0.862955,22.1907,303.506
47733,0.322,0.259613,0.868003,22.3041,301.962
54552,0.3125,0.246281,0.870676,21.3888,314.885
61371,0.3028,0.270448,0.885523,21.2533,316.893
68190,0.2976,0.264055,0.8781,21.2036,317.634


ORIG for pretrained/roberta-base-sst2-TextMix
{'eval_loss': 0.21507763862609863, 'eval_accuracy': 0.9365998515219005, 'eval_f1': 0.9358419758916577, 'eval_precision': 0.9347856116255365, 'eval_recall': 0.9372067979906176, 'eval_runtime': 21.1, 'eval_samples_per_second': 319.195, 'epoch': 10.0}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summa

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-6eefcfe7488fe508.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
6819,0.4809,0.234487,0.882851,35.9548,187.319
13638,0.4137,0.209167,0.832962,35.0849,191.963
20457,0.3788,0.221713,0.832368,35.1312,191.71
27276,0.3583,0.232599,0.8098,35.1696,191.501
34095,0.3414,0.245572,0.794655,36.4427,184.811
40914,0.3274,0.235187,0.809206,35.8779,187.72
47733,0.3169,0.230513,0.905716,35.7518,188.382
54552,0.3074,0.236105,0.808909,36.0433,186.859
61371,0.3019,0.23889,0.854937,35.7082,188.612
68190,0.296,0.245208,0.833853,35.8991,187.609


ORIG for pretrained/xlnet-base-cased-sst2-TextMix
{'eval_loss': 0.20916734635829926, 'eval_accuracy': 0.9227913882702301, 'eval_f1': 0.9211524929068313, 'eval_precision': 0.9245043610782273, 'eval_recall': 0.918823094652339, 'eval_runtime': 35.889, 'eval_samples_per_second': 187.662, 'epoch': 10.0}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSe

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-064c92ac365a3073.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
13638,0.3335,0.253221,0.942836,21.4214,314.405
27276,0.3004,0.235788,0.867112,21.4115,314.551
40914,0.2778,0.236217,0.872754,21.3943,314.803
54552,0.2673,0.259751,0.832071,21.4295,314.286
68190,0.2576,0.230985,0.866815,21.4357,314.196
81828,0.2484,0.252609,0.860579,21.4606,313.831
95466,0.2414,0.240902,0.896511,21.381,314.999
109104,0.2335,0.235252,0.871269,21.4155,314.491
122742,0.2276,0.254446,0.881069,21.4456,314.051
136380,0.2225,0.268152,0.87216,21.6067,311.709


ORIG for pretrained/roberta-base-sst2-SentMix
{'eval_loss': 0.2309848666191101, 'eval_accuracy': 0.9432813659985152, 'eval_f1': 0.9423478932892954, 'eval_precision': 0.9430623964440308, 'eval_recall': 0.9417030543844978, 'eval_runtime': 21.3343, 'eval_samples_per_second': 315.689, 'epoch': 10.0}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summa

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-6eefcfe7488fe508.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
13638,0.3425,0.223196,0.834744,36.0451,186.849
27276,0.2982,0.21837,0.869488,36.0167,186.997
40914,0.2778,0.250564,0.828805,38.3104,175.801
54552,0.2687,0.245913,0.825241,36.6143,183.945
68190,0.2569,0.289887,0.778619,36.2298,185.897
81828,0.25,0.303658,0.78931,36.3767,185.146
95466,0.2388,0.26729,0.833259,36.4226,184.913
109104,0.2313,0.244405,0.824944,36.8588,182.724
122742,0.2246,0.258712,0.840089,36.6105,183.964
136380,0.2198,0.246216,0.834447,36.7331,183.349


ORIG for pretrained/xlnet-base-cased-sst2-SentMix
{'eval_loss': 0.2183699607849121, 'eval_accuracy': 0.9265033407572383, 'eval_f1': 0.9253194157019489, 'eval_precision': 0.9258408798287883, 'eval_recall': 0.9248389715948471, 'eval_runtime': 36.3848, 'eval_samples_per_second': 185.105, 'epoch': 10.0}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSe

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-064c92ac365a3073.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
20457,0.2783,0.247394,0.75902,21.7166,310.131
40914,0.263,0.294637,0.892353,21.8293,308.53
61371,0.4016,0.686402,0.0,21.8751,307.884
81828,0.4333,0.686664,0.0,21.9021,307.504
102285,0.4331,0.685777,0.0,21.9501,306.832
122742,0.433,0.685814,0.0,22.0288,305.736
143199,0.4329,0.686087,0.0,21.9537,306.782
163656,0.4329,0.685958,0.0,22.0668,305.21
184113,0.4329,0.686797,0.0,21.9756,306.476
204570,0.4328,0.686011,0.0,22.0094,306.006


ORIG for pretrained/roberta-base-sst2-WordMix
{'eval_loss': 0.24739353358745575, 'eval_accuracy': 0.9051224944320713, 'eval_f1': 0.9019036705220264, 'eval_precision': 0.9145815709443709, 'eval_recall': 0.896338501182832, 'eval_runtime': 22.1202, 'eval_samples_per_second': 304.472, 'epoch': 10.0}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summa

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-6eefcfe7488fe508.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
20457,0.2821,0.227467,0.83415,37.605,179.099
40914,0.2516,0.262218,0.869191,36.5762,184.136
61371,0.3317,0.686594,0.0,36.0159,187.001
81828,0.4331,0.685789,0.0,36.0204,186.977
102285,0.433,0.689924,0.0,36.1464,186.326
122742,0.4328,0.696592,0.0,36.1176,186.474
143199,0.4327,0.690185,0.0,35.8418,187.909
163656,0.4325,0.701639,0.0,36.0101,187.031
184113,0.4323,0.694983,0.0,36.1324,186.398
204570,0.4321,0.702332,0.0,36.0385,186.883


ORIG for pretrained/xlnet-base-cased-sst2-WordMix
{'eval_loss': 0.22746717929840088, 'eval_accuracy': 0.919524870081663, 'eval_f1': 0.9178309774898608, 'eval_precision': 0.9210469332345799, 'eval_recall': 0.9155809122985892, 'eval_runtime': 36.6575, 'eval_samples_per_second': 183.728, 'epoch': 10.0}
