# Targeted SIB Training

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from transformers import (
    AutoModelForSequenceClassification, 
    AutoTokenizer, 
    Trainer, 
    TrainingArguments, 
    TrainerCallback, 
    EarlyStoppingCallback
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers.trainer_callback import TrainerControl
from datasets import load_dataset
import torch
import pandas as pd
from torch.utils.data import DataLoader
from transforms import TextMix, SentMix, WordMix, SibylCollator

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
def tokenize_fn(text):
    return tokenizer(text, padding=True, truncation=True, max_length=250, return_tensors='pt')

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')
        
def compute_metrics(pred):
    preds, labels = pred
    if len(labels.shape) > 1: 
        acc = acc_at_k(labels, preds, k=2)
        return { 'accuracy': acc }        
    else:
        acc = accuracy_score(labels, preds.argmax(-1))
        return { 'accuracy': acc }        

class TargetedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        if len(labels.shape) > 1: 
            loss = CEwST_loss(logits, labels)
        else:
            loss = torch.nn.functional.cross_entropy(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss

class TargetedMixturesCallback(TrainerCallback):
    """
    A callback that calculates a confusion matrix on the validation
    data and returns the most confused class pairings.
    """
    def __init__(self, dataloader, device):
        self.dataloader = dataloader
        self.device = device
        
    def on_evaluate(self, args, state, control, model, tokenizer, **kwargs):
        cnf_mat = self.get_confusion_matrix(model, tokenizer, self.dataloader)
        new_targets = self.get_most_confused_per_class(cnf_mat)
        print("New targets:", new_targets)
        control = TrainerControl
        control.new_targets = new_targets
        if state.global_step < state.max_steps:
            control.should_training_stop = False
        else:
            control.should_training_stop = True
        return control
        
    def get_confusion_matrix(self, model, tokenizer, dataloader, normalize=True):
        n_classes = max(dataloader.dataset['label']) + 1
        confusion_matrix = torch.zeros(n_classes, n_classes)
        with torch.no_grad():
            for batch in iter(self.dataloader):
                data, targets = batch['text'], batch['label']
                data = tokenizer(data, padding=True, truncation=True, max_length=250, return_tensors='pt')
                input_ids = data['input_ids'].to(self.device)
                attention_mask = data['attention_mask'].to(self.device)
                targets = targets.to(self.device)
                outputs = model(input_ids, attention_mask=attention_mask).logits
                preds = torch.argmax(outputs, dim=1).cpu()
                for t, p in zip(targets.view(-1), preds.view(-1)):
                    confusion_matrix[t.long(), p.long()] += 1    
            if normalize:
                confusion_matrix = confusion_matrix / confusion_matrix.sum(dim=0)
        return confusion_matrix

    def get_most_confused_per_class(self, confusion_matrix):
        idx = torch.arange(len(confusion_matrix))
        cnf = confusion_matrix.fill_diagonal_(0).max(dim=1)[1]
        return torch.stack((idx, cnf)).T.tolist()

class TargetedMixturesCollator:
    def __init__(self, 
                 tokenize_fn, 
                 transform, 
                 transform_prob=1.0, 
                 target_pairs=[], 
                 target_prob=1.0, 
                 num_classes=2):
        
        self.tokenize_fn = tokenize_fn
        self.transform = transform
        self.transform_prob = transform_prob
        self.target_pairs = target_pairs
        self.target_prob = target_prob
        self.num_classes = num_classes
        print("TargetedMixturesCollator initialized with {}".format(transform.__class__.__name__))
        
    def __call__(self, batch):
        text = [x['text'] for x in batch]
        labels = [x['label'] for x in batch]
        batch = (text, labels)
        if torch.rand(1) < self.transform_prob:
            batch = self.transform(
                batch, 
                self.target_pairs,   
                self.target_prob,
                self.num_classes
            )
        text, labels = batch
        labels = torch.tensor(labels)
        if len(labels.shape) == 1:
            labels = torch.nn.functional.one_hot(labels, num_classes=self.num_classes)
        batch = self.tokenize_fn(text)
        batch['labels'] = labels
        batch.pop('idx', None)
        batch.pop('label', None)
        return batch
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [4]:
MODEL_NAMES = ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']
# ts = ['ORIG', 'INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']
ts = ['SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']

In [5]:
results = []

for MODEL_NAME in MODEL_NAMES:
        
    for t in ts: 
        
        transform = None
        num_sampled_INV = 0
        num_sampled_SIB = 0
        label_type = "soft"
        
        if t == "INV":
            num_sampled_INV = 2
            label_type = "hard"
        elif t == "SIB":
            num_sampled_SIB = 2
        elif t == 'INVSIB':
            num_sampled_INV = 1
            num_sampled_SIB = 1
            label_type = None
        elif t == "TextMix":
            transform = TextMix()
        elif t == "SentMix":
            transform = SentMix()
        elif t == "WordMix":
            transform = WordMix()
        
        checkpoint = './results/' + MODEL_NAME + '-SibylCollator-' + t
        
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=5).to(device)

        dataset = load_dataset('ag_news', split='train') 
        dataset_dict = dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']

        test_dataset = load_dataset('ag_news', split='test') 
        test_dataset.rename_column_('label', 'labels')
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        train_batch_size = 6
        eval_batch_size  = 32
        num_epoch = 20
        gradient_accumulation_steps = 1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)

#         tmcb = TargetedMixturesCallback(
#             dataloader=DataLoader(eval_dataset, batch_size=32),
#             device=device
#         )
        escb = EarlyStoppingCallback(
            early_stopping_patience=10
        )
#         tmc = TargetedMixturesCollator(
#             tokenize_fn=tokenize_fn, 
#             transform=t, 
#             transform_prob=0.5,
#             target_pairs=[],
#             target_prob=0.5,
#             num_classes=4
#         )
        sibyl_collator = SibylCollator( 
            tokenize_fn=tokenize_fn, 
            transform=transform, 
            num_sampled_INV=num_sampled_INV, 
            num_sampled_SIB=num_sampled_SIB, 
            task_type="topic", 
            tran_type=None, 
            label_type=label_type,
            one_hot=label_type == "soft",
            transform_prob=0.5,
            target_pairs=[],
            target_prob=0.0,
            reduce_mixed=True,
            num_classes=4
        )

        training_args = TrainingArguments(\
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=5000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            remove_unused_columns=False
        )

        trainer = TargetedTrainer(
            model=model, 
            tokenizer=tokenizer,
            args=training_args,
            compute_metrics=compute_metrics,                  
            train_dataset=train_dataset,         
            eval_dataset=eval_dataset,
            data_collator=sibyl_collator if t != "ORIG" else DefaultCollator(),
            callbacks=[escb] # [tmcb, escb]
        )

        trainer.train()

        # test with ORIG data
        trainer.eval_dataset = test_dataset
        trainer.data_collator = DefaultCollator()
        # trainer.remove_callback(tmcb)

        out_orig = trainer.evaluate()
        out_orig['run'] = checkpoint
        out_orig['test'] = "ORIG"
        print('ORIG for {}\n{}'.format(checkpoint, out_orig))

        results.append(out_orig)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with num_sampled_INV=0 and num_sampled_SIB=2


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6929,0.408098,0.8975,12.086,99.289
10000,0.3718,0.363069,0.916667,11.5174,104.19
15000,0.3552,0.404053,0.9075,9.9742,120.31
20000,0.3711,0.342812,0.9225,10.2541,117.027
25000,0.3466,0.361725,0.92,11.0256,108.837
30000,0.3612,0.370168,0.909167,11.1187,107.927
35000,0.3605,0.475786,0.905,10.4121,115.25
40000,0.3828,0.496682,0.8875,11.7978,101.713
45000,0.3652,0.412427,0.919167,10.352,115.92
50000,0.3688,0.576396,0.905,11.6306,103.176


ORIG for ./results/bert-base-uncased-SibylCollator-SIB
{'eval_loss': 0.2793702483177185, 'eval_accuracy': 0.9313157894736842, 'eval_runtime': 113.758, 'eval_samples_per_second': 66.808, 'epoch': 3.54, 'run': './results/bert-base-uncased-SibylCollator-SIB', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with num_sampled_INV=1 and num_sampled_SIB=1


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.7051,0.395702,0.89,13.7399,87.337
10000,0.3852,0.398082,0.905,14.6251,82.051
15000,0.3755,0.303254,0.924167,11.8243,101.486
20000,0.3629,0.354248,0.913333,14.4039,83.311
25000,0.3471,0.413023,0.910833,12.2732,97.774
30000,0.3624,0.455974,0.910833,13.4382,89.298
35000,0.3711,0.372374,0.9125,11.8204,101.519
40000,0.3863,0.417169,0.910833,14.5425,82.517
45000,0.3816,0.352229,0.920833,12.3423,97.226
50000,0.3891,0.303002,0.918333,14.8559,80.776


ORIG for ./results/bert-base-uncased-SibylCollator-INVSIB
{'eval_loss': 0.29319846630096436, 'eval_accuracy': 0.9277631578947368, 'eval_runtime': 112.877, 'eval_samples_per_second': 67.33, 'epoch': 3.28, 'run': './results/bert-base-uncased-SibylCollator-INVSIB', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6819,0.34969,0.91,10.1924,117.735
10000,0.3673,0.405158,0.9075,10.3141,116.345
15000,0.331,0.271779,0.923333,9.5119,126.158
20000,0.3277,0.321811,0.931667,9.1261,131.491
25000,0.3099,0.354268,0.921667,9.8516,121.808
30000,0.32,0.347446,0.920833,9.888,121.359
35000,0.3589,0.399166,0.910833,9.2085,130.314
40000,0.3718,0.407695,0.914167,9.7824,122.669
45000,0.3526,0.376396,0.9175,9.2327,129.973
50000,0.3888,0.439017,0.905,9.9549,120.544


ORIG for ./results/bert-base-uncased-SibylCollator-TextMix
{'eval_loss': 0.3259882926940918, 'eval_accuracy': 0.9278947368421052, 'eval_runtime': 113.6751, 'eval_samples_per_second': 66.857, 'epoch': 3.54, 'run': './results/bert-base-uncased-SibylCollator-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6919,0.392575,0.905833,10.5647,113.585
10000,0.3697,0.363633,0.909167,10.3836,115.567
15000,0.3427,0.366146,0.926667,9.6503,124.348
20000,0.3423,0.304321,0.924167,9.6076,124.901
25000,0.3232,0.348876,0.920833,9.7225,123.425
30000,0.3301,0.407518,0.9075,10.0118,119.858
35000,0.3424,0.360936,0.9225,9.4922,126.419
40000,0.3736,0.478535,0.898333,10.4873,114.424
45000,0.3473,0.361293,0.925833,9.7991,122.461
50000,0.3494,0.551624,0.894167,10.3664,115.758


ORIG for ./results/bert-base-uncased-SibylCollator-SentMix
{'eval_loss': 0.32576125860214233, 'eval_accuracy': 0.925, 'eval_runtime': 113.0304, 'eval_samples_per_second': 67.239, 'epoch': 3.28, 'run': './results/bert-base-uncased-SibylCollator-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.7111,0.386591,0.875,10.1493,118.235
10000,0.413,0.382369,0.900833,10.3273,116.197
15000,0.3952,0.271895,0.926667,9.9611,120.469
20000,0.3886,0.398223,0.894167,9.5484,125.675
25000,0.3903,0.374484,0.904167,9.8569,121.743
30000,0.3909,0.424645,0.913333,10.1183,118.597
35000,0.4037,0.35544,0.9175,9.3058,128.952
40000,0.4442,0.508034,0.899167,10.1702,117.991
45000,0.4065,0.468702,0.896667,9.3509,128.33
50000,0.428,0.376516,0.908333,10.38,115.607


ORIG for ./results/bert-base-uncased-SibylCollator-WordMix
{'eval_loss': 0.2749074697494507, 'eval_accuracy': 0.9261842105263158, 'eval_runtime': 112.7674, 'eval_samples_per_second': 67.395, 'epoch': 3.28, 'run': './results/bert-base-uncased-SibylCollator-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with num_sampled_INV=0 and num_sampled_SIB=2


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6728,0.427227,0.8925,11.8771,101.035
10000,0.4029,0.392916,0.906667,11.2572,106.598
15000,0.3691,0.371712,0.920833,10.1926,117.733
20000,0.3813,0.383402,0.9175,9.8338,122.028
25000,0.373,0.392763,0.915,10.413,115.24
30000,0.3877,0.354645,0.915,10.6175,113.021
35000,0.4024,0.383187,0.9175,9.6762,124.016
40000,0.4248,0.392008,0.910833,11.5331,104.048
45000,0.4399,0.3894,0.911667,9.5661,125.443
50000,0.4439,0.530887,0.895833,11.6629,102.89


ORIG for ./results/roberta-base-SibylCollator-SIB
{'eval_loss': 0.3537070155143738, 'eval_accuracy': 0.9221052631578948, 'eval_runtime': 110.1731, 'eval_samples_per_second': 68.982, 'epoch': 3.28, 'run': './results/roberta-base-SibylCollator-SIB', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with num_sampled_INV=1 and num_sampled_SIB=1


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6699,0.372203,0.9075,14.6061,82.157
10000,0.4228,0.35527,0.914167,14.7082,81.587
15000,0.3889,0.3492,0.918333,11.6084,103.374
20000,0.4004,0.339994,0.920833,11.6456,103.043
25000,0.3934,0.377006,0.921667,11.9603,100.332
30000,0.4016,0.327424,0.9075,14.9677,80.172
35000,0.4188,0.347402,0.9175,11.5943,103.499
40000,0.4433,0.410775,0.906667,12.7911,93.815
45000,0.4291,0.370319,0.923333,11.9069,100.782
50000,0.4613,0.640442,0.8575,13.2093,90.845


ORIG for ./results/roberta-base-SibylCollator-INVSIB
{'eval_loss': 0.35579848289489746, 'eval_accuracy': 0.9282894736842106, 'eval_runtime': 107.7642, 'eval_samples_per_second': 70.524, 'epoch': 4.8, 'run': './results/roberta-base-SibylCollator-INVSIB', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6608,0.479483,0.905833,10.3372,116.085
10000,0.3867,0.388258,0.914167,10.1135,118.653
15000,0.3509,0.309474,0.924167,9.5179,126.078
20000,0.3467,0.378926,0.9175,9.489,126.462
25000,0.332,0.434968,0.913333,9.7329,123.294
30000,0.3438,0.313707,0.921667,10.0859,118.978
35000,0.3657,0.342437,0.915833,9.0648,132.379
40000,0.3906,0.441295,0.910833,10.1396,118.348
45000,0.3916,0.479449,0.898333,9.3109,128.882
50000,0.391,0.448701,0.899167,10.3478,115.967


ORIG for ./results/roberta-base-SibylCollator-TextMix
{'eval_loss': 0.27471262216567993, 'eval_accuracy': 0.9311842105263158, 'eval_runtime': 109.032, 'eval_samples_per_second': 69.704, 'epoch': 3.28, 'run': './results/roberta-base-SibylCollator-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6777,0.420288,0.91,9.0818,132.132
10000,0.4078,0.421195,0.918333,9.1653,130.928
15000,0.3738,0.309274,0.925,8.4743,141.604
20000,0.3698,0.389796,0.9225,8.5386,140.538
25000,0.3486,0.45468,0.914167,8.7396,137.306
30000,0.369,0.385515,0.914167,9.0918,131.987
35000,0.3828,0.357522,0.924167,9.0586,132.471
40000,0.4066,0.433461,0.920833,9.376,127.986
45000,0.3992,0.489935,0.909167,8.3827,143.152
50000,0.4086,0.479182,0.913333,8.9481,134.106


ORIG for ./results/roberta-base-SibylCollator-SentMix
{'eval_loss': 0.3329855799674988, 'eval_accuracy': 0.921578947368421, 'eval_runtime': 110.017, 'eval_samples_per_second': 69.08, 'epoch': 3.28, 'run': './results/roberta-base-SibylCollator-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6743,0.445255,0.888333,9.9056,121.144
10000,0.4383,0.43903,0.903333,10.0968,118.849
15000,0.4212,0.288345,0.92,9.6607,124.215
20000,0.4169,0.312353,0.924167,9.19,130.577
25000,0.4175,0.365795,0.915,9.5534,125.609
30000,0.4197,0.566723,0.889167,9.6966,123.755
35000,0.4494,0.519074,0.889167,8.9312,134.36
40000,0.4843,0.403719,0.9025,9.8243,122.146
45000,0.4927,0.540749,0.870833,9.0135,133.134
50000,0.5226,0.376355,0.910833,9.9316,120.826


ORIG for ./results/roberta-base-SibylCollator-WordMix
{'eval_loss': 0.2905588448047638, 'eval_accuracy': 0.9277631578947368, 'eval_runtime': 111.5945, 'eval_samples_per_second': 68.104, 'epoch': 3.54, 'run': './results/roberta-base-SibylCollator-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with num_sampled_INV=0 and num_sampled_SIB=2


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6679,0.467838,0.895833,28.6629,41.866
10000,0.4103,0.402469,0.915833,26.2813,45.66
15000,0.371,0.421212,0.9125,23.4243,51.229
20000,0.373,0.385358,0.918333,22.2818,53.856
25000,0.3509,0.394391,0.918333,24.7757,48.434
30000,0.3735,0.488422,0.905,26.3265,45.581
35000,0.3864,0.361023,0.9175,21.6649,55.389
40000,0.4372,0.501662,0.8975,27.6016,43.476
45000,0.4807,0.621925,0.8725,22.4648,53.417
50000,0.7052,1.264954,0.459167,26.2633,45.691


ORIG for ./results/xlnet-base-cased-SibylCollator-SIB
{'eval_loss': 0.2926712930202484, 'eval_accuracy': 0.9301315789473684, 'eval_runtime': 241.974, 'eval_samples_per_second': 31.408, 'epoch': 3.54, 'run': './results/xlnet-base-cased-SibylCollator-SIB', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with num_sampled_INV=1 and num_sampled_SIB=1


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6723,0.472401,0.8925,28.6355,41.906
10000,0.423,0.426812,0.913333,26.5879,45.133
15000,0.4071,0.396017,0.911667,22.4895,53.358
20000,0.3939,0.373974,0.92,21.9303,54.719
25000,0.3854,0.376855,0.920833,25.9351,46.269
30000,0.3897,0.379746,0.9075,25.4608,47.131
35000,0.4163,0.399317,0.919167,22.3643,53.657
40000,0.4762,0.552003,0.893333,28.1794,42.584
45000,0.503,0.560435,0.866667,24.1698,49.649
50000,0.5464,0.690042,0.845833,26.1532,45.883


ORIG for ./results/xlnet-base-cased-SibylCollator-INVSIB
{'eval_loss': 0.35648804903030396, 'eval_accuracy': 0.9244736842105263, 'eval_runtime': 242.7723, 'eval_samples_per_second': 31.305, 'epoch': 3.79, 'run': './results/xlnet-base-cased-SibylCollator-INVSIB', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6351,0.469769,0.901667,20.1043,59.689
10000,0.4019,0.364384,0.923333,19.6569,61.047
15000,0.3756,0.378757,0.908333,17.2029,69.756
20000,0.3575,0.41819,0.925833,17.277,69.456
25000,0.3399,0.456225,0.915,18.3216,65.497
30000,0.3607,0.382556,0.913333,18.9294,63.394
35000,0.3543,0.395394,0.9225,16.891,71.044
40000,0.4002,0.489775,0.9125,19.3905,61.886
45000,0.4237,0.448786,0.91,17.9678,66.786
50000,0.5511,0.654796,0.82,18.6935,64.193


ORIG for ./results/xlnet-base-cased-SibylCollator-TextMix
{'eval_loss': 0.3926885724067688, 'eval_accuracy': 0.9217105263157894, 'eval_runtime': 242.6244, 'eval_samples_per_second': 31.324, 'epoch': 3.54, 'run': './results/xlnet-base-cased-SibylCollator-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6515,0.437895,0.905,20.7553,57.817
10000,0.4077,0.433069,0.91,21.1433,56.756
15000,0.3715,0.434952,0.908333,19.2092,62.47
20000,0.3707,0.327831,0.926667,18.3821,65.281
25000,0.3524,0.330013,0.919167,18.719,64.106
30000,0.3609,0.424502,0.915833,19.5902,61.255
35000,0.3632,0.455167,0.9075,18.3337,65.453
40000,0.408,0.598535,0.8425,21.479,55.869
45000,0.4606,0.572554,0.893333,18.9919,63.185
50000,0.5481,0.531843,0.879167,20.9506,57.277


ORIG for ./results/xlnet-base-cased-SibylCollator-SentMix
{'eval_loss': 0.32802721858024597, 'eval_accuracy': 0.9264473684210527, 'eval_runtime': 246.4713, 'eval_samples_per_second': 30.835, 'epoch': 3.54, 'run': './results/xlnet-base-cased-SibylCollator-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


SibylCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6567,0.601203,0.861667,19.4313,61.756
10000,0.4457,0.449367,0.879167,19.5618,61.344
15000,0.4244,0.357637,0.918333,17.8112,67.373
20000,0.4016,0.354163,0.926667,17.347,69.176
25000,0.4018,0.4176,0.895,17.3964,68.98
30000,0.4105,0.502811,0.8875,18.8572,63.636
35000,0.4285,0.444306,0.905,16.856,71.191
40000,0.479,1.129131,0.724167,19.0362,63.038
45000,0.5208,0.739449,0.848333,17.8016,67.41
50000,0.6168,0.729014,0.844167,18.9602,63.29


ORIG for ./results/xlnet-base-cased-SibylCollator-WordMix
{'eval_loss': 0.33383435010910034, 'eval_accuracy': 0.9285526315789474, 'eval_runtime': 247.9231, 'eval_samples_per_second': 30.655, 'epoch': 3.54, 'run': './results/xlnet-base-cased-SibylCollator-WordMix', 'test': 'ORIG'}


In [6]:
df = pd.DataFrame(results)

In [7]:
df.to_csv('train_AG_NEWS_SibylCollator.csv')

In [8]:
df.to_clipboard(excel=True)

In [10]:
df

Unnamed: 0,eval_loss,eval_accuracy,eval_runtime,eval_samples_per_second,epoch,run,test
0,0.27937,0.931316,113.758,66.808,3.54,./results/bert-base-uncased-SibylCollator-SIB,ORIG
1,0.293198,0.927763,112.877,67.33,3.28,./results/bert-base-uncased-SibylCollator-INVSIB,ORIG
2,0.325988,0.927895,113.6751,66.857,3.54,./results/bert-base-uncased-SibylCollator-TextMix,ORIG
3,0.325761,0.925,113.0304,67.239,3.28,./results/bert-base-uncased-SibylCollator-SentMix,ORIG
4,0.274907,0.926184,112.7674,67.395,3.28,./results/bert-base-uncased-SibylCollator-WordMix,ORIG
5,0.353707,0.922105,110.1731,68.982,3.28,./results/roberta-base-SibylCollator-SIB,ORIG
6,0.355798,0.928289,107.7642,70.524,4.8,./results/roberta-base-SibylCollator-INVSIB,ORIG
7,0.274713,0.931184,109.032,69.704,3.28,./results/roberta-base-SibylCollator-TextMix,ORIG
8,0.332986,0.921579,110.017,69.08,3.28,./results/roberta-base-SibylCollator-SentMix,ORIG
9,0.290559,0.927763,111.5945,68.104,3.54,./results/roberta-base-SibylCollator-WordMix,ORIG
