# Targeted SIB Training

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from transformers import (
    AutoModelForSequenceClassification, 
    AutoTokenizer, 
    Trainer, 
    TrainingArguments, 
    TrainerCallback, 
    EarlyStoppingCallback
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers.trainer_callback import TrainerControl
from datasets import load_dataset
import torch
import pandas as pd
from torch.utils.data import DataLoader
from transforms import TextMix, SentMix, WordMix

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
def tokenize_fn(text):
    return tokenizer(text, padding=True, truncation=True, max_length=250, return_tensors='pt')

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1.mean(),
        'precision': precision.mean(),
        'recall': recall.mean()
    }        
        
def compute_metrics_w_soft_target(pred):
    labels = pred.label_ids
    preds = pred.predictions
    acc = acc_at_k(labels, preds, k=2)
    return {
        'accuracy': acc,
    }

class TargetedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = CEwST_loss(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss

class TargetedMixturesCallback(TrainerCallback):
    """
    A callback that calculates a confusion matrix on the validation
    data and returns the most confused class pairings.
    """
    def __init__(self, dataloader, device):
        self.dataloader = dataloader
        self.device = device
        
    def on_evaluate(self, args, state, control, model, tokenizer, **kwargs):
        cnf_mat = self.get_confusion_matrix(model, tokenizer, self.dataloader)
        new_targets = self.get_most_confused_per_class(cnf_mat)
        print("New targets:", new_targets)
        control = TrainerControl
        control.new_targets = new_targets
        if state.global_step < state.max_steps:
            control.should_training_stop = False
        else:
            control.should_training_stop = True
        return control
        
    def get_confusion_matrix(self, model, tokenizer, dataloader, normalize=True):
        n_classes = max(dataloader.dataset['label']) + 1
        confusion_matrix = torch.zeros(n_classes, n_classes)
        with torch.no_grad():
            for batch in iter(self.dataloader):
                data, targets = batch['text'], batch['label']
                data = tokenizer(data, padding=True, truncation=True, max_length=250, return_tensors='pt')
                input_ids = data['input_ids'].to(self.device)
                attention_mask = data['attention_mask'].to(self.device)
                targets = targets.to(self.device)
                outputs = model(input_ids, attention_mask=attention_mask).logits
                preds = torch.argmax(outputs, dim=1).cpu()
                for t, p in zip(targets.view(-1), preds.view(-1)):
                    confusion_matrix[t.long(), p.long()] += 1    
            if normalize:
                confusion_matrix = confusion_matrix / confusion_matrix.sum(dim=0)
        return confusion_matrix

    def get_most_confused_per_class(self, confusion_matrix):
        idx = torch.arange(len(confusion_matrix))
        cnf = confusion_matrix.fill_diagonal_(0).max(dim=1)[1]
        return torch.stack((idx, cnf)).T.tolist()

class TargetedMixturesCollator:
    def __init__(self, tokenize_fn, transform, target_pairs=[], target_prob=1.0, num_classes=4):
        self.tokenize_fn = tokenize_fn
        self.transform = transform
        self.target_pairs = target_pairs
        self.target_prob = target_prob
        self.num_classes = num_classes
        print("TargetedMixturesCollator initialized with {}".format(transform.__class__.__name__))
        
    def __call__(self, batch):
        text = [x['text'] for x in batch]
        labels = [x['label'] for x in batch]
        batch = (text, labels)
        batch = self.transform(
            batch, 
            self.target_pairs,   
            self.target_prob,
            self.num_classes
        )
        text, labels = batch
        batch = self.tokenize_fn(text)
        batch['labels'] = torch.tensor(labels)
        return batch
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [4]:
MODEL_NAMES = ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']
ts = [TextMix(), SentMix(), WordMix()]

In [5]:
results = []

for MODEL_NAME in MODEL_NAMES:
        
    for t in ts: 
        
        t_str = t.__class__.__name__
        checkpoint = './results/' + MODEL_NAME + '-targeted-' + t_str
        
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=4).to(device)

        dataset = load_dataset('ag_news', split='train') 
        dataset_dict = dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']

        test_dataset = load_dataset('ag_news', split='test') 
        test_dataset.rename_column_('label', 'labels')
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        train_batch_size = 8
        eval_batch_size = 32
        num_epoch = 10
        gradient_accumulation_steps = 1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)

        tmcb = TargetedMixturesCallback(
            dataloader=DataLoader(eval_dataset, batch_size=32),
            device=device
        )
        escb = EarlyStoppingCallback(
            early_stopping_patience=10
        )
        tmc = TargetedMixturesCollator(
            tokenize_fn=tokenize_fn, 
            transform=t,
            target_prob=0.5
        )

        training_args = TrainingArguments(
            seed=1,
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=1000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            remove_unused_columns=False
        )

        trainer = TargetedTrainer(
            model=model, 
            tokenizer=tokenizer,
            args=training_args,
            compute_metrics=compute_metrics_w_soft_target,                  
            train_dataset=train_dataset,         
            eval_dataset=eval_dataset,
            data_collator=tmc,
            callbacks=[tmcb, escb]
        )

        trainer.train()

        # test with ORIG data
        trainer.eval_dataset = test_dataset
        trainer.compute_metrics = compute_metrics
        trainer.data_collator = DefaultCollator()
        trainer.remove_callback(tmcb)

        out_orig = trainer.evaluate()
        out_orig['run'] = checkpoint
        out_orig['test'] = "ORIG"
        print('ORIG for {}\n{}'.format(checkpoint, out_orig))

        results.append(out_orig)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


TargetedMixturesCollator initialized with TextMix


W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1632,0.841849,0.658472,46.2059,129.854
2000,0.8232,0.809177,0.664699,46.207,129.85
3000,0.7808,0.771794,0.658102,45.5575,131.702
4000,0.7644,0.780057,0.680474,45.9357,130.617
5000,0.7684,0.758096,0.720509,45.9699,130.52
6000,0.7443,0.766487,0.7006,45.6288,131.496
7000,0.7601,0.745382,0.689125,46.3876,129.345
8000,0.751,0.758748,0.729534,45.9395,130.606
9000,0.7413,0.771517,0.650278,45.7427,131.168
10000,0.7321,0.769781,0.710425,46.191,129.895


New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 

ORIG for ./results/bert-base-uncased-targeted-TextMix
{'eval_loss': 25.431032180786133, 'eval_accuracy': 0.9367105263157894, 'eval_f1': 0.9365973002462261, 'eval_precision': 0.9366754242217419, 'eval_recall': 0.9367105263157895, 'eval_runtime': 100.5442, 'eval_samples_per_second': 75.589, 'epoch': 2.74, 'run': './results/bert-base-uncased-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1745,0.849899,0.661589,45.518,131.816
2000,0.8231,0.817805,0.639086,45.891,130.745
3000,0.7792,0.785314,0.5981,46.9932,127.678
4000,0.7799,0.78605,0.596946,46.2917,129.613
5000,0.7627,0.763109,0.693854,46.0572,130.273
6000,0.7559,0.82014,0.660154,46.198,129.876
7000,0.7569,0.775092,0.645828,46.0246,130.365
8000,0.7413,0.769967,0.658169,46.0848,130.195
9000,0.7621,0.778055,0.677989,46.0019,130.429
10000,0.7672,0.760361,0.667743,45.2443,132.613


New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 

ORIG for ./results/bert-base-uncased-targeted-SentMix
{'eval_loss': 31.42345428466797, 'eval_accuracy': 0.9355263157894737, 'eval_f1': 0.93549770002292, 'eval_precision': 0.9355946206596242, 'eval_recall': 0.9355263157894738, 'eval_runtime': 100.4882, 'eval_samples_per_second': 75.631, 'epoch': 7.72, 'run': './results/bert-base-uncased-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1564,0.90859,0.640362,46.3539,129.439
2000,0.8701,0.965018,0.565409,46.3131,129.553
3000,0.8251,0.853418,0.585338,46.8901,127.959
4000,0.8043,0.809024,0.623672,46.9979,127.665
5000,0.8015,0.837337,0.601925,46.7242,128.413
6000,0.7934,0.803579,0.617895,46.6493,128.619
7000,0.7954,0.805269,0.599631,46.661,128.587
8000,0.7932,0.816359,0.595833,46.8059,128.189
9000,0.794,0.830895,0.612883,46.4448,129.186
10000,0.7964,0.807267,0.54268,46.3717,129.389


New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 0]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 0], [3, 2]]
New targets: [[0, 1], [1, 3], [2, 3], [3, 2]]
early_stopping_patience_counter


ORIG for ./results/bert-base-uncased-targeted-WordMix
{'eval_loss': 17.457401275634766, 'eval_accuracy': 0.9039473684210526, 'eval_f1': 0.9036912981525473, 'eval_precision': 0.9036114688366205, 'eval_recall': 0.9039473684210526, 'eval_runtime': 100.2085, 'eval_samples_per_second': 75.842, 'epoch': 0.98, 'run': './results/bert-base-uncased-targeted-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1127,0.771963,0.66831,45.4401,132.042
2000,0.7832,0.80331,0.695313,45.3977,132.165
3000,0.7656,0.781338,0.743023,45.671,131.374
4000,0.7652,0.820864,0.646053,45.1035,133.027
5000,0.7499,0.77644,0.648404,45.3981,132.164
6000,0.7549,0.798856,0.709035,45.336,132.345
7000,0.7649,0.792176,0.730105,45.7321,131.199
8000,0.7582,0.773055,0.731464,45.0495,133.187
9000,0.7445,0.795473,0.761945,44.5878,134.566
10000,0.7488,0.794539,0.682491,44.9722,133.416


New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 0]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 3], [2, 3], [3, 2]]
early_stopping_patience_counter


ORIG for ./results/roberta-base-targeted-TextMix
{'eval_loss': 24.688467025756836, 'eval_accuracy': 0.9221052631578948, 'eval_f1': 0.9224425641357582, 'eval_precision': 0.9234926254741409, 'eval_recall': 0.9221052631578948, 'eval_runtime': 99.0346, 'eval_samples_per_second': 76.741, 'epoch': 1.33, 'run': './results/roberta-base-targeted-TextMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with SentMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.1015,0.759651,0.720774,45.7824,131.055
2000,0.7905,0.776611,0.684789,45.9391,130.608
3000,0.7665,0.782814,0.637705,45.9802,130.491
4000,0.7673,0.796187,0.680435,45.9693,130.522
5000,0.7819,0.794483,0.744525,45.5761,131.648
6000,0.7723,0.78691,0.671541,45.8378,130.896
7000,0.7576,0.811842,0.736622,46.1337,130.057
8000,0.7469,0.768118,0.687954,45.6212,131.518
9000,0.7652,0.775836,0.705598,46.0302,130.349
10000,0.7702,0.786508,0.637667,45.8137,130.965


New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
early_stopping_patience_counter


ORIG for ./results/roberta-base-targeted-SentMix
{'eval_loss': 23.09513282775879, 'eval_accuracy': 0.9173684210526316, 'eval_f1': 0.9173812746159944, 'eval_precision': 0.9185799634956395, 'eval_recall': 0.9173684210526316, 'eval_runtime': 98.8003, 'eval_samples_per_second': 76.923, 'epoch': 1.05, 'run': './results/roberta-base-targeted-SentMix', 'test': 'ORIG'}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with WordMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.0976,0.832878,0.643672,44.2526,135.585
2000,0.8448,0.842358,0.570324,43.867,136.777
3000,0.8187,0.829623,0.576125,44.419,135.077
4000,0.797,0.826187,0.570909,43.5002,137.93
5000,0.8016,0.85351,0.550424,44.4935,134.851
6000,0.807,0.821086,0.534845,44.9367,133.521
7000,0.8061,0.813902,0.587292,44.6488,134.382
8000,0.8041,0.802383,0.622662,44.8298,133.84
9000,0.809,0.803633,0.608604,44.0664,136.158
10000,0.7899,0.829861,0.605293,44.1718,135.833


New targets: [[0, 1], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 0]]
New targets: [[0, 1], [1, 3], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 2], [2, 3], [3, 2]]
New targets: [[0, 1], [1, 2], [2, 0], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]
New targets: [[0, 3], [1, 0], [2, 

ORIG for ./results/roberta-base-targeted-WordMix
{'eval_loss': 17.9605770111084, 'eval_accuracy': 0.8863157894736842, 'eval_f1': 0.8870375967397961, 'eval_precision': 0.8898028670023486, 'eval_recall': 0.8863157894736843, 'eval_runtime': 98.2325, 'eval_samples_per_second': 77.367, 'epoch': 2.18, 'run': './results/roberta-base-targeted-WordMix', 'test': 'ORIG'}


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.



TargetedMixturesCollator initialized with TextMix


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1000,1.0688,0.823994,0.652225,108.4786,55.31


New targets: [[0, 2], [1, 0], [2, 3], [3, 2]]


RuntimeError: CUDA out of memory. Tried to allocate 24.00 MiB (GPU 0; 6.00 GiB total capacity; 4.20 GiB already allocated; 5.62 MiB free; 4.57 GiB reserved in total by PyTorch)

In [None]:
df = pd.DataFrame(results)
df

In [None]:
df.to_csv('train_AG_NEWS_targeted_r1.csv')

In [None]:
df.to_clipboard(excel=True)

In [None]:
# ORIG for ./results/bert-base-uncased-targeted-TextMix
# {'eval_loss': 31.2364559173584, 'eval_accuracy': 0.9381578947368421, 'eval_f1': 0.9381945850526017, 'eval_precision': 0.938240633851668, 'eval_recall': 0.9381578947368421, 'eval_runtime': 117.2622, 'eval_samples_per_second': 64.812, 'epoch': 5.0, 'run': 'TextMix', 'test': 'ORIG'}