# Targeted SIB Training

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from transformers import (
    AutoModelForSequenceClassification, 
    AutoTokenizer, 
    Trainer, 
    TrainingArguments, 
    TrainerCallback, 
    EarlyStoppingCallback
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers.trainer_callback import TrainerControl
from datasets import load_dataset
import torch
import pandas as pd
from torch.utils.data import DataLoader
from transforms import TextMix, SentMix, WordMix

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')



In [3]:
def tokenize_fn(text):
    return tokenizer(text, padding=True, truncation=True, max_length=250, return_tensors='pt')

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')
        
def compute_metrics(pred):
    preds, labels = pred
    if len(labels.shape) > 1: 
        acc = acc_at_k(labels, preds, k=2)
        return { 'accuracy': acc }        
    else:
        acc = accuracy_score(labels, preds.argmax(-1))
        return { 'accuracy': acc }        

class TargetedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        if len(labels.shape) > 1: 
            loss = CEwST_loss(logits, labels)
        else:
            loss = torch.nn.functional.cross_entropy(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss

class TargetedMixturesCallback(TrainerCallback):
    """
    A callback that calculates a confusion matrix on the validation
    data and returns the most confused class pairings.
    """
    def __init__(self, dataloader, device):
        self.dataloader = dataloader
        self.device = device
        
    def on_evaluate(self, args, state, control, model, tokenizer, **kwargs):
        cnf_mat = self.get_confusion_matrix(model, tokenizer, self.dataloader)
        new_targets = self.get_most_confused_per_class(cnf_mat)
        print("New targets:", new_targets)
        control = TrainerControl
        control.new_targets = new_targets
        if state.global_step < state.max_steps:
            control.should_training_stop = False
        else:
            control.should_training_stop = True
        return control
        
    def get_confusion_matrix(self, model, tokenizer, dataloader, normalize=True):
        n_classes = max(dataloader.dataset['label']) + 1
        confusion_matrix = torch.zeros(n_classes, n_classes)
        with torch.no_grad():
            for batch in iter(self.dataloader):
                data, targets = batch['text'], batch['label']
                data = tokenizer(data, padding=True, truncation=True, max_length=250, return_tensors='pt')
                input_ids = data['input_ids'].to(self.device)
                attention_mask = data['attention_mask'].to(self.device)
                targets = targets.to(self.device)
                outputs = model(input_ids, attention_mask=attention_mask).logits
                preds = torch.argmax(outputs, dim=1).cpu()
                for t, p in zip(targets.view(-1), preds.view(-1)):
                    confusion_matrix[t.long(), p.long()] += 1    
            if normalize:
                confusion_matrix = confusion_matrix / confusion_matrix.sum(dim=0)
        return confusion_matrix

    def get_most_confused_per_class(self, confusion_matrix):
        idx = torch.arange(len(confusion_matrix))
        cnf = confusion_matrix.fill_diagonal_(0).max(dim=1)[1]
        return torch.stack((idx, cnf)).T.tolist()

class TargetedMixturesCollator:
    def __init__(self, 
                 tokenize_fn, 
                 transform, 
                 transform_prob=1.0, 
                 target_pairs=[], 
                 target_prob=1.0, 
                 num_classes=2):
        
        self.tokenize_fn = tokenize_fn
        self.transform = transform
        self.transform_prob = transform_prob
        self.target_pairs = target_pairs
        self.target_prob = target_prob
        self.num_classes = num_classes
        print("TargetedMixturesCollator initialized with {}".format(transform.__class__.__name__))
        
    def __call__(self, batch):
        text = [x['text'] for x in batch]
        labels = [x['label'] for x in batch]
        batch = (text, labels)
        if torch.rand(1) < self.transform_prob:
            batch = self.transform(
                batch, 
                self.target_pairs,   
                self.target_prob,
                self.num_classes
            )
        text, labels = batch
        labels = torch.tensor(labels)
        if len(labels.shape) == 1:
            labels = torch.nn.functional.one_hot(labels, num_classes=self.num_classes)
        batch = self.tokenize_fn(text)
        batch['labels'] = labels
        batch.pop('idx', None)
        return batch
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [4]:
MODEL_NAMES = ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']
ts = [TextMix(), SentMix(), WordMix()]

In [5]:
results = []

for MODEL_NAME in MODEL_NAMES:
    for t in ts:  
    
        t_str = t.__class__.__name__
        checkpoint = './results/' + MODEL_NAME + '-targeted-' + t_str
        
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2).to(device)

        dataset = load_dataset('glue', 'sst2', split='train[:90%]') 
        dataset.rename_column_('sentence', 'text')
        dataset_dict = dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']

        test_dataset = load_dataset('glue', 'sst2', split='train[90%:]')
        test_dataset.rename_column_('sentence', 'text') 
        test_dataset.rename_column_('label', 'labels')
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        train_batch_size = 8
        eval_batch_size = 32
        num_epoch = 20
        gradient_accumulation_steps = 1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)

#         tmcb = TargetedMixturesCallback(
#             dataloader=DataLoader(eval_dataset, batch_size=32),
#             device=device
#         )
        escb = EarlyStoppingCallback(
            early_stopping_patience=10
        )
        tmc = TargetedMixturesCollator(
            tokenize_fn=tokenize_fn, 
            transform=t, 
            transform_prob=0.5,
            target_pairs=[(0,1),(1,0)],
            target_prob=0.5,
            num_classes=2
        )

        training_args = TrainingArguments(
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=2000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            remove_unused_columns=False
        )

        trainer = TargetedTrainer(
            model=model, 
            tokenizer=tokenizer,
            args=training_args,
            compute_metrics=compute_metrics,                  
            train_dataset=train_dataset,         
            eval_dataset=eval_dataset,
            data_collator=tmc,
            callbacks=[escb] # [tmcb, escb]
        )

        trainer.train()

        # test with ORIG data
        trainer.eval_dataset = test_dataset
        trainer.data_collator = DefaultCollator()
        # trainer.remove_callback(tmcb)

        out_orig = trainer.evaluate()
        out_orig['run'] = checkpoint
        out_orig['test'] = "ORIG"
        print('ORIG for {}\n{}'.format(checkpoint, out_orig))

        results.append(out_orig)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.558,0.473142,0.804355,8.185,370.313
4000,0.3741,0.356218,0.891455,8.4361,359.29
6000,0.3537,0.343995,0.91125,8.5086,356.227
8000,0.3207,0.317531,0.917519,8.1665,371.152
10000,0.3258,0.344437,0.897064,8.7176,347.688
12000,0.3233,0.331945,0.91488,8.1583,371.525
14000,0.3318,0.303418,0.921808,8.5749,353.473
16000,0.2994,0.350693,0.919169,8.7021,348.306
18000,0.2942,0.310931,0.915209,8.5309,355.295
20000,0.2877,0.316047,0.922468,7.9914,379.285


ORIG for ./results/bert-base-uncased-targeted-TextMix
{'eval_loss': 0.3205556571483612, 'eval_accuracy': 0.9478841870824053, 'eval_runtime': 21.8997, 'eval_samples_per_second': 307.539, 'epoch': 15.28, 'run': './results/bert-base-uncased-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5442,0.421433,0.832728,8.4057,360.591
4000,0.3735,0.340744,0.875289,8.5965,352.587
6000,0.3463,0.318136,0.893105,8.3636,362.403
8000,0.3234,0.332224,0.908941,8.3318,363.785
10000,0.3148,0.328997,0.895084,8.8623,342.011
12000,0.3275,0.335676,0.91257,8.4917,356.939
14000,0.3258,0.337332,0.915209,8.5963,352.592
16000,0.3116,0.34367,0.910591,8.4533,358.56
18000,0.3053,0.356221,0.91125,8.3779,361.783
20000,0.2957,0.349277,0.91191,8.2876,365.726


ORIG for ./results/bert-base-uncased-targeted-SentMix
{'eval_loss': 0.2529617249965668, 'eval_accuracy': 0.948626577579807, 'eval_runtime': 21.7917, 'eval_samples_per_second': 309.063, 'epoch': 7.78, 'run': './results/bert-base-uncased-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5686,0.478888,0.779941,8.4565,358.424
4000,0.4158,0.371759,0.831739,8.3645,362.364
6000,0.3898,0.395251,0.82547,8.4373,359.24
8000,0.3834,0.376948,0.839327,8.3918,361.184
10000,0.3593,0.475109,0.846915,8.6951,348.587
12000,0.3606,0.352527,0.866711,8.3019,365.095
14000,0.3652,0.436052,0.86869,8.5464,354.653
16000,0.3359,0.366088,0.848895,8.6985,348.45
18000,0.333,0.404838,0.885516,8.3165,364.456
20000,0.3457,0.386097,0.880567,8.4569,358.405


ORIG for ./results/bert-base-uncased-targeted-WordMix
{'eval_loss': 0.2585805356502533, 'eval_accuracy': 0.9328878990348923, 'eval_runtime': 22.0245, 'eval_samples_per_second': 305.795, 'epoch': 8.89, 'run': './results/bert-base-uncased-targeted-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5361,0.454626,0.859452,8.25,367.393
4000,0.4052,0.418405,0.874959,8.331,363.823
6000,0.3906,0.3834,0.906631,8.3892,361.3
8000,0.3661,0.446195,0.895744,8.108,373.829
10000,0.3572,0.364362,0.905312,8.5539,354.343
12000,0.3635,0.395606,0.909271,7.9171,382.843
14000,0.3682,0.391262,0.887166,8.3363,363.589
16000,0.3554,0.392536,0.906961,8.4212,359.924
18000,0.3569,0.437222,0.889146,8.1821,370.444
20000,0.3498,0.38464,0.894424,7.9551,381.012


ORIG for ./results/roberta-base-targeted-TextMix
{'eval_loss': 0.28808724880218506, 'eval_accuracy': 0.9260579064587974, 'eval_runtime': 20.8713, 'eval_samples_per_second': 322.692, 'epoch': 6.67, 'run': './results/roberta-base-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5396,0.440461,0.830419,8.1215,373.207
4000,0.3937,0.369077,0.91224,8.2914,365.561
6000,0.3912,0.320892,0.924777,8.3995,360.856
8000,0.3707,0.339751,0.918839,8.2938,365.455
10000,0.3638,0.389388,0.9129,8.5985,352.503
12000,0.3731,0.33492,0.920488,8.3074,364.857
14000,0.3623,0.326016,0.904652,8.2068,369.329
16000,0.3588,0.373387,0.878588,8.3736,361.969
18000,0.347,0.337807,0.908941,8.2457,367.586
20000,0.3402,0.339651,0.916199,8.3489,363.042


ORIG for ./results/roberta-base-targeted-SentMix
{'eval_loss': 0.2426948845386505, 'eval_accuracy': 0.9312546399406088, 'eval_runtime': 21.06, 'eval_samples_per_second': 319.8, 'epoch': 3.61, 'run': './results/roberta-base-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5714,0.488562,0.750907,8.4191,360.014
4000,0.4557,0.409672,0.847575,8.2624,366.843
6000,0.4459,0.40555,0.839987,8.3572,362.679
8000,0.4222,0.407799,0.861432,8.3419,363.345
10000,0.4192,0.494683,0.803365,8.4732,357.715
12000,0.413,0.354543,0.857803,8.17,370.992
14000,0.4461,0.569196,0.820191,8.4663,358.009
16000,0.4717,0.485788,0.759815,8.5657,353.852
18000,0.437,0.442415,0.835038,8.3766,361.84
20000,0.457,0.415584,0.841966,8.4218,359.901


ORIG for ./results/roberta-base-targeted-WordMix
{'eval_loss': 0.3388954699039459, 'eval_accuracy': 0.9171492204899777, 'eval_runtime': 21.1711, 'eval_samples_per_second': 318.122, 'epoch': 3.89, 'run': './results/roberta-base-targeted-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5905,0.457489,0.82481,12.8672,235.56
4000,0.3997,0.428434,0.86935,13.636,222.279
6000,0.39,0.420024,0.882547,12.8176,236.472
8000,0.3644,0.369465,0.899703,12.792,236.944
10000,0.3518,0.406631,0.900363,13.5966,222.923
12000,0.357,0.340363,0.91356,12.483,242.811
14000,0.3647,0.346046,0.900363,13.3977,226.232
16000,0.3496,0.395284,0.888816,13.3972,226.242
18000,0.3364,0.404085,0.881887,12.8938,235.074
20000,0.3403,0.380088,0.898053,12.8115,236.585


ORIG for ./results/xlnet-base-cased-targeted-TextMix
{'eval_loss': 0.28366708755493164, 'eval_accuracy': 0.9250185597624351, 'eval_runtime': 33.0223, 'eval_samples_per_second': 203.953, 'epoch': 4.45, 'run': './results/xlnet-base-cased-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.5816,0.443552,0.821511,13.3144,227.647
4000,0.4089,0.47193,0.865721,13.2974,227.939
6000,0.389,0.401099,0.880238,13.4558,225.256
8000,0.3555,0.391989,0.897394,12.8726,235.461
10000,0.3749,0.385877,0.900033,13.7331,220.708
12000,0.3539,0.373992,0.903332,12.593,240.689
14000,0.3647,0.491413,0.875619,13.2824,228.197
16000,0.3583,0.444837,0.875289,13.5011,224.5
18000,0.3602,0.411619,0.883207,13.2631,228.528
20000,0.3532,0.393918,0.875948,12.462,243.219


ORIG for ./results/xlnet-base-cased-targeted-SentMix
{'eval_loss': 0.2852342128753662, 'eval_accuracy': 0.9475872308834447, 'eval_runtime': 33.0679, 'eval_samples_per_second': 203.672, 'epoch': 16.12, 'run': './results/xlnet-base-cased-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6012,0.517693,0.787859,12.8634,235.63
4000,0.4407,0.418483,0.823491,13.102,231.339
6000,0.433,0.478009,0.835368,13.1232,230.965
8000,0.4137,0.447416,0.849225,12.6623,239.372
10000,0.4079,0.458578,0.837018,13.4309,225.673
12000,0.4157,0.357776,0.857143,12.9008,234.946
14000,0.3981,0.400482,0.860772,12.7126,238.425
16000,0.397,0.443094,0.843946,13.2186,229.299
18000,0.43,0.439822,0.833058,12.7975,236.843
20000,0.4907,0.491829,0.816562,13.0118,232.942


ORIG for ./results/xlnet-base-cased-targeted-WordMix
{'eval_loss': 0.30975812673568726, 'eval_accuracy': 0.9198218262806236, 'eval_runtime': 32.9464, 'eval_samples_per_second': 204.423, 'epoch': 4.72, 'run': './results/xlnet-base-cased-targeted-WordMix', 'test': 'ORIG'}


In [6]:
df = pd.DataFrame(results)
df

Unnamed: 0,eval_loss,eval_accuracy,eval_runtime,eval_samples_per_second,epoch,run,test
0,0.320556,0.947884,21.8997,307.539,15.28,./results/bert-base-uncased-targeted-TextMix,ORIG
1,0.252962,0.948627,21.7917,309.063,7.78,./results/bert-base-uncased-targeted-SentMix,ORIG
2,0.258581,0.932888,22.0245,305.795,8.89,./results/bert-base-uncased-targeted-WordMix,ORIG
3,0.288087,0.926058,20.8713,322.692,6.67,./results/roberta-base-targeted-TextMix,ORIG
4,0.242695,0.931255,21.06,319.8,3.61,./results/roberta-base-targeted-SentMix,ORIG
5,0.338895,0.917149,21.1711,318.122,3.89,./results/roberta-base-targeted-WordMix,ORIG
6,0.283667,0.925019,33.0223,203.953,4.45,./results/xlnet-base-cased-targeted-TextMix,ORIG
7,0.285234,0.947587,33.0679,203.672,16.12,./results/xlnet-base-cased-targeted-SentMix,ORIG
8,0.309758,0.919822,32.9464,204.423,4.72,./results/xlnet-base-cased-targeted-WordMix,ORIG


In [7]:
df.to_csv('train_SST2_targeted_r3.csv')

In [8]:
df.to_clipboard(excel=True)