# Targeted SIB Training

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from transformers import (
    AutoModelForSequenceClassification, 
    AutoTokenizer, 
    Trainer, 
    TrainingArguments, 
    TrainerCallback, 
    EarlyStoppingCallback
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers.trainer_callback import TrainerControl
from datasets import load_dataset
import torch
import pandas as pd
from torch.utils.data import DataLoader
from transforms import TextMix, SentMix, WordMix

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
def tokenize_fn(text):
    return tokenizer(text, padding=True, truncation=True, max_length=250, return_tensors='pt')

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1.mean(),
        'precision': precision.mean(),
        'recall': recall.mean()
    }        
        
def compute_metrics_w_soft_target(pred):
    labels = pred.label_ids
    preds = pred.predictions
    acc = acc_at_k(labels, preds, k=2)
    return {
        'accuracy': acc,
    }

class TargetedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = CEwST_loss(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss

class TargetedMixturesCallback(TrainerCallback):
    """
    A callback that calculates a confusion matrix on the validation
    data and returns the most confused class pairings.
    """
    def __init__(self, dataloader, device):
        self.dataloader = dataloader
        self.device = device
        
    def on_evaluate(self, args, state, control, model, tokenizer, **kwargs):
        cnf_mat = self.get_confusion_matrix(model, tokenizer, self.dataloader)
        new_targets = self.get_most_confused_per_class(cnf_mat)
        print("New targets:", new_targets)
        control = TrainerControl
        control.new_targets = new_targets
        if state.global_step < state.max_steps:
            control.should_training_stop = False
        else:
            control.should_training_stop = True
        return control
        
    def get_confusion_matrix(self, model, tokenizer, dataloader, normalize=True):
        n_classes = max(dataloader.dataset['label']) + 1
        confusion_matrix = torch.zeros(n_classes, n_classes)
        with torch.no_grad():
            for batch in iter(self.dataloader):
                data, targets = batch['text'], batch['label']
                data = tokenizer(data, padding=True, truncation=True, max_length=250, return_tensors='pt')
                input_ids = data['input_ids'].to(self.device)
                attention_mask = data['attention_mask'].to(self.device)
                targets = targets.to(self.device)
                outputs = model(input_ids, attention_mask=attention_mask).logits
                preds = torch.argmax(outputs, dim=1).cpu()
                for t, p in zip(targets.view(-1), preds.view(-1)):
                    confusion_matrix[t.long(), p.long()] += 1    
            if normalize:
                confusion_matrix = confusion_matrix / confusion_matrix.sum(dim=0)
        return confusion_matrix

    def get_most_confused_per_class(self, confusion_matrix):
        idx = torch.arange(len(confusion_matrix))
        cnf = confusion_matrix.fill_diagonal_(0).max(dim=1)[1]
        return torch.stack((idx, cnf)).T.tolist()

class TargetedMixturesCollator:
    def __init__(self, tokenize_fn, transform, target_pairs=[], target_prob=1.0, num_classes=4):
        self.tokenize_fn = tokenize_fn
        self.transform = transform
        self.target_pairs = target_pairs
        self.target_prob = target_prob
        self.num_classes = num_classes
        print("TargetedMixturesCollator initialized with {}".format(transform.__class__.__name__))
        
    def __call__(self, batch):
        text = [x['text'] for x in batch]
        labels = [x['label'] for x in batch]
        batch = (text, labels)
        batch = self.transform(
            batch, 
            self.target_pairs,   
            self.target_prob,
            self.num_classes
        )
        text, labels = batch
        batch = self.tokenize_fn(text)
        batch['labels'] = torch.tensor(labels)
        return batch
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [4]:
MODEL_NAMES = ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']
ts = [TextMix(), SentMix(), WordMix()]

In [None]:
results = []

for MODEL_NAME in MODEL_NAMES:
        
    for t in ts: 
        
        t_str = t.__class__.__name__
        checkpoint = './results/' + MODEL_NAME + '-targeted-' + t_str
        
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=4).to(device)

        dataset = load_dataset('ag_news', split='train') 
        dataset_dict = dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']

        test_dataset = load_dataset('ag_news', split='test') 
        test_dataset.rename_column_('label', 'labels')
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        train_batch_size = 6
        eval_batch_size = 32
        num_epoch = 10
        gradient_accumulation_steps = 1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)

        tmcb = TargetedMixturesCallback(
            dataloader=DataLoader(eval_dataset, batch_size=32),
            device=device
        )
        escb = EarlyStoppingCallback(
            early_stopping_patience=10
        )
        tmc = TargetedMixturesCollator(
            tokenize_fn=tokenize_fn, 
            transform=t,
            target_prob=0.5
        )

        training_args = TrainingArguments(\
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=1000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            remove_unused_columns=False
        )

        trainer = TargetedTrainer(
            model=model, 
            tokenizer=tokenizer,
            args=training_args,
            compute_metrics=compute_metrics_w_soft_target,                  
            train_dataset=train_dataset,         
            eval_dataset=eval_dataset,
            data_collator=tmc,
            callbacks=[tmcb, escb]
        )

        trainer.train()

        # test with ORIG data
        trainer.eval_dataset = test_dataset
        trainer.compute_metrics = compute_metrics
        trainer.data_collator = DefaultCollator()
        trainer.remove_callback(tmcb)

        out_orig = trainer.evaluate()
        out_orig['run'] = checkpoint
        out_orig['test'] = "ORIG"
        print('ORIG for {}\n{}'.format(checkpoint, out_orig))

        results.append(out_orig)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with TextMix


W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.2526,0.919768,0.631333,45.6923,131.313
2000,0.8335,0.810204,0.645148,45.5798,131.637
3000,0.7777,0.789817,0.664317,46.2387,129.761
4000,0.7649,0.785819,0.657255,46.5831,128.802
5000,0.7357,0.782604,0.666495,45.8065,130.986
6000,0.7394,0.778329,0.735231,45.5363,131.763
7000,0.7449,0.801634,0.705663,46.013,130.398
8000,0.7486,0.769013,0.619499,45.5129,131.831
9000,0.7507,0.7835,0.72346,46.0417,130.317
10000,0.7576,0.810306,0.707109,45.343,132.325


New targets: [[0, 2], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 

ORIG for ./results/bert-base-uncased-targeted-TextMix
{'eval_loss': 23.948352813720703, 'eval_accuracy': 0.9246052631578947, 'eval_f1': 0.9246351834327998, 'eval_precision': 0.9259357368580343, 'eval_recall': 0.9246052631578947, 'eval_runtime': 100.3019, 'eval_samples_per_second': 75.771, 'epoch': 1.21, 'run': './results/bert-base-uncased-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.244,0.931424,0.616449,45.5665,131.676
2000,0.8532,0.834879,0.648995,45.5929,131.6
3000,0.7694,0.84633,0.623239,45.5484,131.728
4000,0.7841,0.825528,0.646126,45.8531,130.853
5000,0.761,0.79758,0.717232,45.4688,131.959
6000,0.7615,0.841722,0.682945,45.3995,132.16
7000,0.7529,0.792255,0.639005,45.2966,132.46
8000,0.7384,0.77115,0.690759,45.5661,131.677
9000,0.7539,0.829266,0.651179,45.8228,130.939
10000,0.7383,0.85799,0.638667,45.7756,131.074


New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
early_stopping_patience_counter


ORIG for ./results/bert-base-uncased-targeted-SentMix
{'eval_loss': 22.313922882080078, 'eval_accuracy': 0.9180263157894737, 'eval_f1': 0.9179626120032178, 'eval_precision': 0.9181496129451846, 'eval_recall': 0.9180263157894737, 'eval_runtime': 99.9503, 'eval_samples_per_second': 76.038, 'epoch': 0.79, 'run': './results/bert-base-uncased-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1982,0.970566,0.599436,45.1173,132.987
2000,0.9106,0.886178,0.597241,45.9048,130.705
3000,0.8381,0.877627,0.568523,45.1863,132.784
4000,0.7982,0.857383,0.573596,45.1208,132.976
5000,0.8032,0.839064,0.594934,45.5642,131.682
6000,0.7917,0.820047,0.633415,45.3099,132.421
7000,0.7789,0.836729,0.598929,44.9337,133.53
8000,0.7824,0.841498,0.604356,45.144,132.908
9000,0.804,0.838745,0.606598,45.3139,132.41
10000,0.7813,0.889793,0.583556,45.1265,132.959


New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
early_stopping_patience_counter


ORIG for ./results/bert-base-uncased-targeted-WordMix
{'eval_loss': 20.192277908325195, 'eval_accuracy': 0.9017105263157895, 'eval_f1': 0.90160491349668, 'eval_precision': 0.9016304552985033, 'eval_recall': 0.9017105263157895, 'eval_runtime': 99.3157, 'eval_samples_per_second': 76.524, 'epoch': 0.84, 'run': './results/bert-base-uncased-targeted-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1711,0.777479,0.696322,43.8513,136.826
2000,0.7834,0.821144,0.621363,43.7285,137.21
3000,0.7618,0.784308,0.708993,43.9787,136.43
4000,0.7423,0.831048,0.646918,44.3818,135.19
5000,0.7519,0.788297,0.68078,43.2376,138.768
6000,0.7536,0.886401,0.690509,44.2324,135.647
7000,0.763,0.801046,0.702739,43.4072,138.226
8000,0.7592,0.812113,0.699046,43.7802,137.048
9000,0.7512,0.822854,0.709752,44.1168,136.003
10000,0.7392,0.805018,0.732566,43.5644,137.727


New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 1], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 

ORIG for ./results/roberta-base-targeted-TextMix
{'eval_loss': 24.647722244262695, 'eval_accuracy': 0.9227631578947368, 'eval_f1': 0.922686848959688, 'eval_precision': 0.9234344268649703, 'eval_recall': 0.9227631578947368, 'eval_runtime': 98.622, 'eval_samples_per_second': 77.062, 'epoch': 1.26, 'run': './results/roberta-base-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1568,0.806368,0.697237,42.806,140.167
2000,0.773,0.814275,0.664701,42.5112,141.139
3000,0.7532,0.840406,0.674202,42.4422,141.369
4000,0.7643,0.799433,0.680204,43.6143,137.57
5000,0.7834,0.806203,0.709784,43.3389,138.444
6000,0.7566,0.835609,0.737616,42.5277,141.084
7000,0.7509,0.800669,0.709315,43.1767,138.964
8000,0.7645,0.769141,0.715467,43.1797,138.954
9000,0.7434,0.789614,0.647352,42.5201,141.11
10000,0.7426,0.77525,0.694938,43.1713,138.981


New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 1], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
early_stopping_patience_counter


ORIG for ./results/roberta-base-targeted-SentMix
{'eval_loss': 25.880184173583984, 'eval_accuracy': 0.9156578947368421, 'eval_f1': 0.9155026600303513, 'eval_precision': 0.9180396179041801, 'eval_recall': 0.9156578947368421, 'eval_runtime': 98.7575, 'eval_samples_per_second': 76.956, 'epoch': 0.84, 'run': './results/roberta-base-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1741,0.874719,0.648887,45.5635,131.684
2000,0.8543,0.863883,0.5993,44.1175,136.0
3000,0.8129,0.825971,0.556426,44.5798,134.59
4000,0.7916,0.834896,0.604397,44.2077,135.723
5000,0.8001,0.829944,0.540258,45.3917,132.183
6000,0.8027,0.825413,0.54849,45.7146,131.249
7000,0.804,0.860661,0.596208,45.0408,133.212
8000,0.8152,0.876942,0.588277,44.7518,134.073
9000,0.8145,0.854369,0.580123,45.1462,132.902
10000,0.7912,0.833423,0.580528,44.4354,135.028


New targets: [[0, 1], [1, 0], [2, 0], [3, 0]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 0]]
New targets: [[0, 2], [1, 2], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 1], [1, 2], [2, 3], [3, 0]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 

ORIG for ./results/roberta-base-targeted-WordMix
{'eval_loss': 20.235855102539062, 'eval_accuracy': 0.8998684210526315, 'eval_f1': 0.9003638006056494, 'eval_precision': 0.9047092816783304, 'eval_recall': 0.8998684210526315, 'eval_runtime': 98.7674, 'eval_samples_per_second': 76.948, 'epoch': 1.42, 'run': './results/roberta-base-targeted-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1437,0.869547,0.65112,108.7134,55.191
2000,0.8602,0.850099,0.641214,105.8761,56.67
3000,0.7812,0.813988,0.635406,106.6824,56.242
4000,0.7541,0.826314,0.67605,109.2829,54.903
5000,0.7682,0.784603,0.702011,106.5637,56.304
6000,0.7589,0.780498,0.645365,108.6935,55.201
7000,0.7555,0.861906,0.645757,108.0882,55.51
8000,0.7698,0.801567,0.67988,108.2528,55.426


New targets: [[0, 2], [1, 0], [2, 3], [3, 0]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 0]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]


In [None]:
df = pd.DataFrame(results)
df

In [None]:
df.to_csv('train_AG_NEWS_targeted_r1.csv')

In [None]:
df.to_clipboard(excel=True)

In [None]:
# ORIG for ./results/bert-base-uncased-targeted-TextMix
# {'eval_loss': 31.2364559173584, 'eval_accuracy': 0.9381578947368421, 'eval_f1': 0.9381945850526017, 'eval_precision': 0.938240633851668, 'eval_recall': 0.9381578947368421, 'eval_runtime': 117.2622, 'eval_samples_per_second': 64.812, 'epoch': 5.0, 'run': 'TextMix', 'test': 'ORIG'}