# Targeted SIB Training

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from transformers import (
    AutoModelForSequenceClassification, 
    AutoTokenizer, 
    Trainer, 
    TrainingArguments, 
    TrainerCallback, 
    EarlyStoppingCallback
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers.trainer_callback import TrainerControl
from datasets import load_dataset
import torch
import pandas as pd
from torch.utils.data import DataLoader
from transforms import TextMix, SentMix, WordMix

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
def tokenize_fn(text):
    return tokenizer(text, padding=True, truncation=True, max_length=250, return_tensors='pt')

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1.mean(),
        'precision': precision.mean(),
        'recall': recall.mean()
    }        
        
def compute_metrics_w_soft_target(pred):
    labels = pred.label_ids
    preds = pred.predictions
    acc = acc_at_k(labels, preds, k=2)
    return {
        'accuracy': acc,
    }

class TargetedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = CEwST_loss(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss

class TargetedMixturesCallback(TrainerCallback):
    """
    A callback that calculates a confusion matrix on the validation
    data and returns the most confused class pairings.
    """
    def __init__(self, dataloader, device):
        self.dataloader = dataloader
        self.device = device
        
    def on_evaluate(self, args, state, control, model, tokenizer, **kwargs):
        cnf_mat = self.get_confusion_matrix(model, tokenizer, self.dataloader)
        new_targets = self.get_most_confused_per_class(cnf_mat)
        print("New targets:", new_targets)
        control = TrainerControl
        control.new_targets = new_targets
        if state.global_step < state.max_steps:
            control.should_training_stop = False
        else:
            control.should_training_stop = True
        return control
        
    def get_confusion_matrix(self, model, tokenizer, dataloader, normalize=True):
        n_classes = max(dataloader.dataset['label']) + 1
        confusion_matrix = torch.zeros(n_classes, n_classes)
        with torch.no_grad():
            for batch in iter(self.dataloader):
                data, targets = batch['text'], batch['label']
                data = tokenizer(data, padding=True, truncation=True, max_length=250, return_tensors='pt')
                input_ids = data['input_ids'].to(self.device)
                attention_mask = data['attention_mask'].to(self.device)
                targets = targets.to(self.device)
                outputs = model(input_ids, attention_mask=attention_mask).logits
                preds = torch.argmax(outputs, dim=1).cpu()
                for t, p in zip(targets.view(-1), preds.view(-1)):
                    confusion_matrix[t.long(), p.long()] += 1    
            if normalize:
                confusion_matrix = confusion_matrix / confusion_matrix.sum(dim=0)
        return confusion_matrix

    def get_most_confused_per_class(self, confusion_matrix):
        idx = torch.arange(len(confusion_matrix))
        cnf = confusion_matrix.fill_diagonal_(0).max(dim=1)[1]
        return torch.stack((idx, cnf)).T.tolist()

class TargetedMixturesCollator:
    def __init__(self, tokenize_fn, transform, target_pairs=[], target_prob=1.0, num_classes=2):
        self.tokenize_fn = tokenize_fn
        self.transform = transform
        self.target_pairs = target_pairs
        self.target_prob = target_prob
        self.num_classes = num_classes
        print("TargetedMixturesCollator initialized with {}".format(transform.__class__.__name__))
        
    def __call__(self, batch):
        text = [x['text'] for x in batch]
        labels = [x['label'] for x in batch]
        batch = (text, labels)
        batch = self.transform(
            batch, 
            self.target_pairs,   
            self.target_prob,
            self.num_classes
        )
        text, labels = batch
        batch = self.tokenize_fn(text)
        batch['labels'] = torch.tensor(labels)
        return batch
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [4]:
MODEL_NAMES = ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']
ts = [TextMix(), SentMix(), WordMix()]

In [5]:
results = []

for MODEL_NAME in MODEL_NAMES:
    for t in ts:  
    
        t_str = t.__class__.__name__
        checkpoint = './results/' + MODEL_NAME + '-targeted-' + t_str
        
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2).to(device)

        dataset = load_dataset('glue', 'sst2', split='train[:90%]') 
        dataset.rename_column_('sentence', 'text')
        dataset_dict = dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']

        test_dataset = load_dataset('glue', 'sst2', split='train[90%:]')
        test_dataset.rename_column_('sentence', 'text') 
        test_dataset.rename_column_('label', 'labels')
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        train_batch_size = 8
        eval_batch_size = 32
        num_epoch = 20
        gradient_accumulation_steps = 1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)

#         tmcb = TargetedMixturesCallback(
#             dataloader=DataLoader(eval_dataset, batch_size=32),
#             device=device
#         )
        escb = EarlyStoppingCallback(
            early_stopping_patience=10
        )
        tmc = TargetedMixturesCollator(
            tokenize_fn=tokenize_fn, 
            transform=t,
            target_pairs=[(0,1),(1,0)],
            target_prob=0.5,
            num_classes=2
        )

        training_args = TrainingArguments(
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=2000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            remove_unused_columns=False
        )

        trainer = TargetedTrainer(
            model=model, 
            tokenizer=tokenizer,
            args=training_args,
            compute_metrics=compute_metrics_w_soft_target,                  
            train_dataset=train_dataset,         
            eval_dataset=eval_dataset,
            data_collator=tmc,
            callbacks=[escb] # [tmcb, escb]
        )

        trainer.train()

        # test with ORIG data
        trainer.eval_dataset = test_dataset
        trainer.compute_metrics = compute_metrics
        trainer.data_collator = DefaultCollator()
        # trainer.remove_callback(tmcb)

        out_orig = trainer.evaluate()
        out_orig['run'] = checkpoint
        out_orig['test'] = "ORIG"
        print('ORIG for {}\n{}'.format(checkpoint, out_orig))

        results.append(out_orig)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with TextMix


W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6446,0.578551,0.765754,7.6857,394.368
4000,0.4518,0.450146,0.842296,7.7153,392.854
6000,0.424,0.454919,0.888816,7.7774,389.717
8000,0.4035,0.502953,0.872649,7.4941,404.45
10000,0.3914,0.415215,0.901023,7.6018,398.72
12000,0.4051,0.425973,0.894754,7.6211,397.713
14000,0.4052,0.398105,0.903992,7.5807,399.833
16000,0.3848,0.425721,0.905312,7.7443,391.387
18000,0.3727,0.421396,0.909601,7.4247,408.231
20000,0.3786,0.39958,0.892445,7.6856,394.376


early_stopping_patience_counter


early_stopping_patience_counter
ORIG for ./results/bert-base-uncased-targeted-TextMix
{'eval_loss': 3.8884871006011963, 'eval_accuracy': 0.9392724573125464, 'eval_f1': 0.9382225675862759, 'eval_precision': 0.9393564136059972, 'eval_recall': 0.9372503844920124, 'eval_runtime': 17.6167, 'eval_samples_per_second': 382.307, 'epoch': 17.78, 'run': './results/bert-base-uncased-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6388,0.581104,0.774662,7.672,395.073
4000,0.4496,0.430262,0.830419,7.8288,387.161
6000,0.4247,0.428839,0.874299,7.961,380.732
8000,0.3987,0.446683,0.888816,7.7776,389.707
10000,0.393,0.439805,0.872319,7.7836,389.409
12000,0.4048,0.404043,0.895084,7.6718,395.081
14000,0.4021,0.39763,0.872649,7.7187,392.681
16000,0.3908,0.413653,0.906632,7.9687,380.362
18000,0.3906,0.406766,0.904652,7.7715,390.015
20000,0.3842,0.388488,0.902672,7.9251,382.455


early_stopping_patience_counter


early_stopping_patience_counter
ORIG for ./results/bert-base-uncased-targeted-SentMix
{'eval_loss': 3.712137460708618, 'eval_accuracy': 0.9376391982182628, 'eval_f1': 0.9366704266945021, 'eval_precision': 0.9369365518694865, 'eval_recall': 0.9364153494652911, 'eval_runtime': 16.9257, 'eval_samples_per_second': 397.916, 'epoch': 15.84, 'run': './results/bert-base-uncased-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6739,0.631729,0.65325,7.7312,392.049
4000,0.5179,0.535726,0.769053,7.8257,387.316
6000,0.4898,0.481833,0.791818,7.7048,393.392
8000,0.4685,0.507732,0.788849,7.756,390.796
10000,0.4559,0.477258,0.806005,7.7266,392.281
12000,0.4553,0.46445,0.815902,7.821,387.546
14000,0.4572,0.47752,0.808974,7.9448,381.509
16000,0.432,0.476921,0.817882,7.6962,393.829
18000,0.4245,0.482498,0.818542,7.7189,392.671
20000,0.4243,0.467404,0.816232,7.7283,392.196


early_stopping_patience_counter


ORIG for ./results/bert-base-uncased-targeted-WordMix
{'eval_loss': 2.8262295722961426, 'eval_accuracy': 0.9373422420193022, 'eval_f1': 0.9362260902532189, 'eval_precision': 0.9376362285894349, 'eval_recall': 0.9350536964217183, 'eval_runtime': 16.9506, 'eval_samples_per_second': 397.331, 'epoch': 15.84, 'run': './results/bert-base-uncased-targeted-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6158,0.595183,0.786869,7.606,398.504
4000,0.4482,0.472041,0.858463,7.6453,396.452
6000,0.4368,0.413785,0.909931,7.7431,391.444
8000,0.42,0.451109,0.898053,7.7734,389.917
10000,0.422,0.433989,0.898713,7.778,389.691
12000,0.4262,0.450637,0.892115,7.645,396.47
14000,0.4272,0.473055,0.878258,7.5447,401.739
16000,0.4145,0.480303,0.893764,7.6234,397.592
18000,0.4113,0.446567,0.878258,7.7349,391.861
20000,0.4034,0.417151,0.884527,7.6036,398.628


early_stopping_patience_counter


ORIG for ./results/roberta-base-targeted-TextMix
{'eval_loss': 2.9884274005889893, 'eval_accuracy': 0.9435783221974758, 'eval_f1': 0.9427891787608251, 'eval_precision': 0.9423966499162479, 'eval_recall': 0.9432109504327504, 'eval_runtime': 16.545, 'eval_samples_per_second': 407.072, 'epoch': 15.84, 'run': './results/roberta-base-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6239,0.564919,0.844606,7.7026,393.502
4000,0.4319,0.398181,0.903002,7.7767,389.753
6000,0.4381,0.454227,0.900033,7.7173,392.752
8000,0.424,0.503579,0.893764,7.8331,386.948
10000,0.4205,0.403762,0.908281,7.8348,386.862
12000,0.4185,0.415584,0.916529,7.9666,380.462
14000,0.4275,0.434807,0.885516,7.6665,395.358
16000,0.4109,0.447512,0.905312,7.7954,388.817
18000,0.4061,0.450322,0.885516,7.6573,395.831
20000,0.4111,0.434285,0.916859,7.5852,399.594


early_stopping_patience_counter


early_stopping_patience_counter
ORIG for ./results/roberta-base-targeted-SentMix
{'eval_loss': 2.7558422088623047, 'eval_accuracy': 0.9429844097995546, 'eval_f1': 0.9421787597086402, 'eval_precision': 0.9418434046063713, 'eval_recall': 0.9425350464111215, 'eval_runtime': 16.5536, 'eval_samples_per_second': 406.861, 'epoch': 15.56, 'run': './results/roberta-base-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6599,0.622896,0.69548,7.861,385.573
4000,0.5258,0.552885,0.773012,7.7474,391.228
6000,0.501,0.512739,0.749588,7.7251,392.357
8000,0.4978,0.493153,0.797426,7.9004,383.652
10000,0.49,0.479546,0.801056,7.9235,382.535
12000,0.4917,0.547117,0.786869,7.7314,392.035
14000,0.4863,0.482812,0.804685,7.9418,381.65
16000,0.4809,0.523391,0.793797,7.8215,387.522
18000,0.4735,0.49572,0.812603,7.9459,381.455
20000,0.4634,0.482666,0.78324,8.0182,378.015


early_stopping_patience_counter


ORIG for ./results/roberta-base-targeted-WordMix
{'eval_loss': 1.4908337593078613, 'eval_accuracy': 0.880920564216778, 'eval_f1': 0.8758818492505398, 'eval_precision': 0.8951886823536714, 'eval_recall': 0.8692707020666626, 'eval_runtime': 16.5048, 'eval_samples_per_second': 408.064, 'epoch': 7.22, 'run': './results/roberta-base-targeted-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6461,0.594827,0.78357,12.2443,247.544
4000,0.4549,0.436235,0.863411,12.5116,242.256
6000,0.4306,0.456206,0.8258,12.5465,241.582
8000,0.4189,0.454064,0.877928,12.8471,235.929
10000,0.4125,0.429549,0.867371,12.7749,237.263
12000,0.421,0.456181,0.851204,12.7801,237.165
14000,0.4153,0.427129,0.866051,12.5695,241.14
16000,0.4079,0.430067,0.888156,12.6369,239.852
18000,0.3923,0.42795,0.887826,12.6131,240.305
20000,0.3956,0.443465,0.883867,12.2044,248.353


early_stopping_patience_counter


ORIG for ./results/xlnet-base-cased-targeted-TextMix
{'eval_loss': 2.389803647994995, 'eval_accuracy': 0.9273942093541203, 'eval_f1': 0.9260501016250737, 'eval_precision': 0.927810066510642, 'eval_recall': 0.9246463228386823, 'eval_runtime': 26.3955, 'eval_samples_per_second': 255.158, 'epoch': 6.67, 'run': './results/xlnet-base-cased-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6476,0.613538,0.778951,13.3334,227.324
4000,0.4611,0.495636,0.847245,13.0092,232.988
6000,0.4321,0.43298,0.880238,13.2875,228.109
8000,0.4204,0.431587,0.877928,12.9135,234.716
10000,0.4182,0.414039,0.892115,13.0292,232.632
12000,0.4194,0.47987,0.876278,13.3034,227.836
14000,0.4152,0.434126,0.867041,13.3393,227.222
16000,0.42,0.427618,0.875619,13.3473,227.087
18000,0.4008,0.47078,0.866711,13.7293,220.768
20000,0.4065,0.444799,0.885516,13.2037,229.557


early_stopping_patience_counter


ORIG for ./results/xlnet-base-cased-targeted-SentMix
{'eval_loss': 2.586996555328369, 'eval_accuracy': 0.9361544172234595, 'eval_f1': 0.9352841806698615, 'eval_precision': 0.9347649379982681, 'eval_recall': 0.9358589726974871, 'eval_runtime': 26.3615, 'eval_samples_per_second': 255.486, 'epoch': 9.17, 'run': './results/xlnet-base-cased-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
2000,0.6726,0.63621,0.677004,12.9883,233.364
4000,0.5287,0.59385,0.751237,12.5056,242.372
6000,0.505,0.545183,0.771033,12.4727,243.011
8000,0.4999,0.602829,0.744309,12.3669,245.089
10000,0.4845,0.498178,0.797097,12.4607,243.245
12000,0.4773,0.498005,0.774662,12.3719,244.99
14000,0.489,0.709821,0.698449,12.9075,234.825
16000,0.5906,0.700436,0.549984,12.6981,238.698
18000,0.6988,0.687496,0.548994,12.8317,236.212
20000,0.6987,0.689743,0.551633,12.7988,236.819


early_stopping_patience_counter


ORIG for ./results/xlnet-base-cased-targeted-WordMix
{'eval_loss': 1.8109349012374878, 'eval_accuracy': 0.8974016332590943, 'eval_f1': 0.8953633509736856, 'eval_precision': 0.8977366081715648, 'eval_recall': 0.8936208335958666, 'eval_runtime': 26.3755, 'eval_samples_per_second': 255.351, 'epoch': 4.17, 'run': './results/xlnet-base-cased-targeted-WordMix', 'test': 'ORIG'}


In [6]:
df = pd.DataFrame(results)
df

Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,epoch,run,test
0,3.888487,0.939272,0.938223,0.939356,0.93725,17.6167,382.307,17.78,./results/bert-base-uncased-targeted-TextMix,ORIG
1,3.712137,0.937639,0.93667,0.936937,0.936415,16.9257,397.916,15.84,./results/bert-base-uncased-targeted-SentMix,ORIG
2,2.82623,0.937342,0.936226,0.937636,0.935054,16.9506,397.331,15.84,./results/bert-base-uncased-targeted-WordMix,ORIG
3,2.988427,0.943578,0.942789,0.942397,0.943211,16.545,407.072,15.84,./results/roberta-base-targeted-TextMix,ORIG
4,2.755842,0.942984,0.942179,0.941843,0.942535,16.5536,406.861,15.56,./results/roberta-base-targeted-SentMix,ORIG
5,1.490834,0.880921,0.875882,0.895189,0.869271,16.5048,408.064,7.22,./results/roberta-base-targeted-WordMix,ORIG
6,2.389804,0.927394,0.92605,0.92781,0.924646,26.3955,255.158,6.67,./results/xlnet-base-cased-targeted-TextMix,ORIG
7,2.586997,0.936154,0.935284,0.934765,0.935859,26.3615,255.486,9.17,./results/xlnet-base-cased-targeted-SentMix,ORIG
8,1.810935,0.897402,0.895363,0.897737,0.893621,26.3755,255.351,4.17,./results/xlnet-base-cased-targeted-WordMix,ORIG


In [7]:
df.to_csv('train_SST2_targeted_r1.csv')

In [8]:
df.to_clipboard(excel=True)