In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import load_dataset, concatenate_datasets, Dataset
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import os

from utils import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [2]:
def one_hot_encode(y, nb_classes=2):
    if not isinstance(y, np.ndarray):
        y = np.expand_dims(np.array(y), 0)
    res = np.eye(nb_classes)[np.array(y).reshape(-1)]
    return res.reshape(list(y.shape)+[nb_classes])[0]

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1.mean(),
        'precision': precision.mean(),
        'recall': recall.mean()
    }

def acc_at_k(y_true, y_pred, k=2):
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    if acc.item() > 1:
        print(y_true.shape, y_true)
        print(y_pred.shape, y_pred)
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')

def compute_metrics_w_soft_target(pred):
    labels = pred.label_ids
    preds = pred.predictions
    acc = acc_at_k(labels, preds, k=2)
    return {
        'accuracy': acc,
    }

class Trainer_w_soft_target(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = CEwST_loss(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [3]:
# ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']
# ['ORIG', 'INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']

In [4]:
MODEL_NAMES = ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']

In [None]:
use_pretrain = False

results = []
for MODEL_NAME in MODEL_NAMES:
    for t in ['ORIG', 'INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']: 
        
        soft_target = False
        eval_only = False
        
        checkpoint = 'pretrained/' + MODEL_NAME + "-sst2-ORIG+" + t 
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        
        if t == 'ORIG':
            train_dataset = load_dataset('glue', 'sst2', split='train[:90%]')
            train_dataset.rename_column_('sentence', 'text')
        else: 
            
            # load custom data    
            text = npy_load("./assets/SST2/" + t + "/text.npy")
            label = npy_load("./assets/SST2/" + t + "/label.npy")
            if len(label.shape) > 1:
                df = pd.DataFrame({'text': text, 'label': label.tolist()})
                df.text = df.text.astype(str)
                df.label = df.label.map(lambda y: np.array(y))
            else:
                df = pd.DataFrame({'text': text, 'label': label})
                df.text = df.text.astype(str)
                df.label = df.label.astype(object)
            train_dataset = Dataset.from_pandas(df) 
            
            # load orig data
            orig_dataset = load_dataset('glue', 'sst2', split='train[:90%]')
            orig_dataset.remove_columns_(['idx'])
            orig_dataset.rename_column_('sentence', 'text')
            df = orig_dataset.to_pandas()
            df = df[df.columns[::-1]]
            df.text = df.text.astype(str)
            if len(label.shape) > 1:
                df.label = df.label.map(one_hot_encode)
            else:
                df.label = df.label.astype(object)
            orig_dataset = Dataset.from_pandas(df)
            
            # merge orig + custom data
            train_dataset = concatenate_datasets([orig_dataset, train_dataset])
            train_dataset.shuffle()
            
        if use_pretrain and os.path.exists(checkpoint):
            print('loading {}...'.format(checkpoint))
            MODEL_NAME = checkpoint
            eval_only = True
            
        # split to get train
        dataset_dict = train_dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']
        test_dataset = load_dataset('glue', 'sst2', split='train[-10%:]')
        test_dataset.rename_column_('sentence', 'text')
        
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
            
        train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))
        eval_dataset = eval_dataset.map(tokenize, batched=True, batch_size=len(eval_dataset))
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        train_dataset.rename_column_('label', 'labels')
        eval_dataset.rename_column_('label', 'labels')
        test_dataset.rename_column_('label', 'labels')
        train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        eval_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        eval_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        if len(np.array(train_dataset['labels']).shape) > 1:
            soft_target = True
        
        train_batch_size = 8
        eval_batch_size = 32
        num_epoch = 10
        gradient_accumulation_steps=1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)

        training_args = TrainingArguments(
            seed=1,
            # adafactor=True,
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            # gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=2000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            # run_name=checkpoint
        )

        if soft_target:
            trainer = Trainer_w_soft_target(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics_w_soft_target,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
                data_collator=DefaultCollator(),
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )
        else: 
            trainer = Trainer(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )

        if not eval_only:
            trainer.train()
        
        trainer.compute_metrics = compute_metrics
            
        # test ORIG
        trainer.eval_dataset = test_dataset
        out = trainer.evaluate()
        out['run'] = checkpoint
        print('ORIG for {}\n{}'.format(checkpoint, out))   
        
        results.append(out)

Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
rename_column_ is deprecated and will be removed in the next major version of datasets. Use the dataset.rename_column method instead.
Loading cached split indices for dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-32a6a23548aa6274.arrow and C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-0d3572f739d46d53.arrow
Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.trans

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
2000,0.4151,0.306279,0.906631,0.906223,0.905615,0.91037,10.2628,295.337
4000,0.3102,0.278534,0.923788,0.922177,0.927539,0.919077,10.216,296.691
6000,0.3037,0.265617,0.926097,0.925642,0.92446,0.928716,10.2052,297.006
8000,0.2754,0.332547,0.932036,0.930813,0.934033,0.928637,10.1823,297.673
10000,0.2426,0.283571,0.933685,0.93288,0.932848,0.932912,10.1891,297.475
12000,0.2573,0.262476,0.927747,0.926963,0.92644,0.927561,10.2077,296.932
14000,0.2551,0.322876,0.935335,0.934308,0.936209,0.932857,10.1743,297.908
16000,0.1981,0.288539,0.930716,0.929392,0.93336,0.926861,10.1724,297.964
18000,0.1965,0.284688,0.937314,0.936163,0.939669,0.933833,10.1648,298.187
20000,0.2014,0.308018,0.938304,0.937298,0.939472,0.935679,10.154,298.503


ORIG for pretrained/bert-base-uncased-sst2-ORIG+ORIG
{'eval_loss': 0.34084874391555786, 'eval_accuracy': 0.9437268002969562, 'eval_f1': 0.9428203203051284, 'eval_precision': 0.9433627980381265, 'eval_recall': 0.9423196646542293, 'eval_runtime': 22.9149, 'eval_samples_per_second': 293.914, 'epoch': 10.0, 'run': 'pretrained/bert-base-uncased-sst2-ORIG+ORIG'}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
remove_columns_ is deprecated and will be removed in the next major version of datasets. Use the dataset.remove_columns method instead.
Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architectu

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-80412efd19620de1.arrow





Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
2000,0.5198,0.40699,0.827945,0.82094,0.841553,0.816177,91.1436,66.51
4000,0.3838,0.321684,0.857638,0.854407,0.860959,0.851381,91.3309,66.374
6000,0.3609,0.407728,0.865556,0.86346,0.865522,0.862047,91.2161,66.458
8000,0.3587,0.327549,0.86935,0.868234,0.8675,0.869261,91.2236,66.452
10000,0.3675,0.322734,0.86836,0.865325,0.872318,0.8621,90.9934,66.62
12000,0.3656,0.331175,0.858628,0.856278,0.858877,0.854622,91.5599,66.208
14000,0.3966,0.463466,0.836688,0.829699,0.852733,0.82451,91.1069,66.537
16000,0.3751,0.48412,0.84708,0.845307,0.845472,0.845151,90.9295,66.667
18000,0.3867,0.399292,0.855823,0.854787,0.853915,0.856388,91.3475,66.362
20000,0.3693,0.391869,0.867865,0.865806,0.867879,0.864382,91.0579,66.573


ORIG for pretrained/bert-base-uncased-sst2-ORIG+INV
{'eval_loss': 0.22934286296367645, 'eval_accuracy': 0.9272457312546399, 'eval_f1': 0.9265553141535987, 'eval_precision': 0.9250828171225598, 'eval_recall': 0.929157122983028, 'eval_runtime': 23.1228, 'eval_samples_per_second': 291.271, 'epoch': 1.94, 'run': 'pretrained/bert-base-uncased-sst2-ORIG+INV'}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initiali

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-80412efd19620de1.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5822,0.483918,0.757427,58.2604,104.05
4000,0.4937,0.46572,0.810361,58.843,103.02
6000,0.4771,0.458156,0.811933,58.3418,103.905
8000,0.4695,0.463236,0.831102,57.5861,105.268
10000,0.4754,0.524379,0.838256,56.8881,106.56
12000,0.4574,0.457948,0.832105,56.4089,107.465
14000,0.4668,0.469572,0.83495,56.0521,108.149
16000,0.4407,0.487929,0.81731,56.4924,107.306
18000,0.4454,0.437296,0.822175,56.6175,107.069
20000,0.4394,0.450881,0.852468,57.5728,105.293


ORIG for pretrained/bert-base-uncased-sst2-ORIG+SIB
{'eval_loss': 3.8303322792053223, 'eval_accuracy': 0.940014847809948, 'eval_f1': 0.9392432285855458, 'eval_precision': 0.938451741012869, 'eval_recall': 0.9401792273357352, 'eval_runtime': 22.9028, 'eval_samples_per_second': 294.068, 'epoch': 9.72, 'run': 'pretrained/bert-base-uncased-sst2-ORIG+SIB'}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initiali

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-80412efd19620de1.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5626,0.459765,0.812479,92.9791,65.197
4000,0.4623,0.423294,0.832323,93.7305,64.675
6000,0.4408,0.453584,0.842464,92.5233,65.519
8000,0.4439,0.452885,0.846708,92.647,65.431
10000,0.4449,0.440391,0.850654,92.4383,65.579
12000,0.43,0.435332,0.843656,92.7444,65.362
14000,0.4455,0.468316,0.851173,92.2597,65.706
16000,0.4265,0.427456,0.861764,92.6479,65.43
18000,0.423,0.426851,0.861085,93.078,65.128
20000,0.413,0.450499,0.856776,93.3702,64.924


ORIG for pretrained/bert-base-uncased-sst2-ORIG+INVSIB
{'eval_loss': 2.4658586978912354, 'eval_accuracy': 0.9349665924276169, 'eval_f1': 0.9340570007822142, 'eval_precision': 0.9336723618090452, 'eval_recall': 0.9344706039030594, 'eval_runtime': 22.9495, 'eval_samples_per_second': 293.47, 'epoch': 3.33, 'run': 'pretrained/bert-base-uncased-sst2-ORIG+INVSIB'}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initiali

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-80412efd19620de1.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.4999,0.383299,0.851534,30.6412,197.838
4000,0.3718,0.316063,0.887001,30.7011,197.452
6000,0.3299,0.338949,0.902507,30.7232,197.31
8000,0.3261,0.301207,0.910261,30.7459,197.164
10000,0.3238,0.377753,0.900033,30.8336,196.603
12000,0.3067,0.317735,0.910426,30.7919,196.87
14000,0.3222,0.293654,0.926097,30.8206,196.687
16000,0.2929,0.329496,0.913725,30.7893,196.887
18000,0.2923,0.315154,0.922963,30.7473,197.156
20000,0.2785,0.290189,0.922963,30.8483,196.51


ORIG for pretrained/bert-base-uncased-sst2-ORIG+TextMix
{'eval_loss': 3.459319591522217, 'eval_accuracy': 0.9459539717891611, 'eval_f1': 0.9451746127107483, 'eval_precision': 0.9449550204900994, 'eval_recall': 0.9454027160028868, 'eval_runtime': 22.8649, 'eval_samples_per_second': 294.557, 'epoch': 5.42, 'run': 'pretrained/bert-base-uncased-sst2-ORIG+TextMix'}


Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Reusing dataset glue (C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initiali

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\Fabrice\.cache\huggingface\datasets\glue\sst2\1.0.0\7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4\cache-80412efd19620de1.arrow





Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.474,0.338679,0.717515,45.7787,198.63
4000,0.3313,0.307838,0.74625,45.9423,197.922
6000,0.3008,0.270661,0.76915,47.9396,189.676
8000,0.2836,0.261724,0.784238,46.0773,197.342
10000,0.273,0.253895,0.786836,45.9574,197.857
12000,0.2672,0.248588,0.792271,46.0315,197.539
14000,0.266,0.252973,0.79702,46.0851,197.309
16000,0.2712,0.240353,0.791805,46.5117,195.499


In [None]:
df = pd.DataFrame(results)
df

In [None]:
df.to_csv('train_SST2_r3.csv')

In [None]:
df.to_clipboard(excel=True)