In [1]:
from utils import *

from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import load_dataset, concatenate_datasets, Dataset
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [2]:
def one_hot_encode(y, nb_classes=4):
    if not isinstance(y, np.ndarray):
        y = np.expand_dims(np.array(y), 0)
    res = np.eye(nb_classes)[np.array(y).reshape(-1)]
    return res.reshape(list(y.shape)+[nb_classes])[0]

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=250)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=None)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1.mean(),
        'precision': precision.mean(),
        'recall': recall.mean()
    }

def acc_at_k(y_true, y_pred, k=2):
    y_true = torch.tensor(y_true) if type(y_true) != torch.Tensor else y_true
    y_pred = torch.tensor(y_pred) if type(y_pred) != torch.Tensor else y_pred
    total = len(y_true)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=-1)
    out_weights, out_idx = torch.topk(y_pred, k=k, dim=-1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    acc = correct / total
    return acc.item()

def CEwST_loss(logits, target, reduction='mean'):
    """
    Cross Entropy with Soft Target (CEwST) Loss
    :param logits: (batch, *)
    :param target: (batch, *) same shape as logits, each item must be a valid distribution: target[i, :].sum() == 1.
    """
    logprobs = torch.nn.functional.log_softmax(logits.view(logits.shape[0], -1), dim=1)
    batchloss = - torch.sum(target.view(target.shape[0], -1) * logprobs, dim=1)
    if reduction == 'none':
        return batchloss
    elif reduction == 'mean':
        return torch.mean(batchloss)
    elif reduction == 'sum':
        return torch.sum(batchloss)
    else:
        raise NotImplementedError('Unsupported reduction mode.')

def compute_metrics_w_soft_target(pred):
    labels = pred.label_ids
    preds = pred.predictions
    acc = acc_at_k(labels, preds, k=2)
    return {
        'accuracy': acc,
    }

class Trainer_w_soft_target(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        loss = CEwST_loss(logits, labels)
        if return_outputs:
            return loss, outputs
        return loss
    
class DefaultCollator:
    def __init__(self):
        pass
    def __call__(self, batch):
        return torch.utils.data.dataloader.default_collate(batch)

In [3]:
from sklearn.datasets import fetch_20newsgroups

def get_20NG_test_dataset():
    cats = [
        'talk.politics.mideast',                                # Wolrd 0
        'rec.sport.hockey', 'rec.sport.baseball',               # Sports 1
        # 'misc.forsale',                                       # Business 2
        'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space', # Sci/Tech 3
    ]

    dataset = fetch_20newsgroups(
        subset='all',
        categories=cats,
        remove=('headers', 'footers', 'quotes')
    )

    df = pd.DataFrame([dataset.data, dataset.target]).T
    df.rename(columns={0:'text', 1: 'label'}, inplace=True)

    mapper = {
        0: 1,
        1: 1,
        2: 3,
        3: 3,
        4: 3,
        5: 3,
        6: 0,
    }

    df.label = df.label.map(mapper)
    df.text = df.text.replace('\n', ' ', regex=True).str.strip()

    test_dataset = Dataset.from_pandas(df)
    
    return test_dataset

In [4]:
# ['ORIG', 'INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']
# ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']

In [5]:
MODEL_NAMES = ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased']
ts = ['ORIG', 'INV', 'SIB', 'INVSIB', 'TextMix', 'SentMix', 'WordMix']

In [None]:
use_pretrain = False

results = []
for MODEL_NAME in MODEL_NAMES:
    for t in ts: 
                        
        soft_target = False
        eval_only = False
        
        checkpoint = 'pretrained/' + MODEL_NAME + "-ag_news-ORIG+" + t 
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        
        if t == 'ORIG':
            train_dataset = load_dataset('ag_news', split='train')
        else:
            # load custom data    
            text = npy_load("./assets/AG_NEWS/" + t + "/text.npy")
            label = npy_load("./assets/AG_NEWS/" + t + "/label.npy")
            if len(label.shape) > 1:
                df = pd.DataFrame({'text': text, 'label': label.tolist()})
                df.text = df.text.astype(str)
                df.label = df.label.map(lambda y: np.array(y))
            else:
                df = pd.DataFrame({'text': text, 'label': label})
                df.text = df.text.astype(str)
                df.label = df.label.astype(object)
            train_dataset = Dataset.from_pandas(df)  
            
            # load orig data
            orig_dataset = load_dataset('ag_news', split='train')
            df = orig_dataset.to_pandas()
            df = df[df.columns[::-1]]
            df.text = df.text.astype(str)
            if len(label.shape) > 1:
                df.label = df.label.map(one_hot_encode)
            else:
                df.label = df.label.astype(object)
            orig_dataset = Dataset.from_pandas(df)
            
            # merge orig + custom data
            train_dataset = concatenate_datasets([orig_dataset, train_dataset])
            train_dataset.shuffle()
            
        if use_pretrain and os.path.exists(checkpoint):
            print('loading {}...'.format(checkpoint))
            MODEL_NAME = checkpoint
            eval_only = True
                
        dataset_dict = train_dataset.train_test_split(
            test_size = 0.05,
            train_size = 0.95,
            shuffle = True
        )
        train_dataset = dataset_dict['train']
        eval_dataset = dataset_dict['test']
        test_dataset = load_dataset('ag_news')['test']
        test_dataset_20NG = get_20NG_test_dataset()
        
        # # reduce training time
        # n = 10000
        # train_dataset = Dataset.from_dict(train_dataset[:n])
        # eval_dataset = Dataset.from_dict(eval_dataset[:n])
        # test_dataset = Dataset.from_dict(test_dataset[:n])

        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=4).to(device)
                
        train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))
        eval_dataset = eval_dataset.map(tokenize, batched=True, batch_size=len(eval_dataset))
        test_dataset = test_dataset.map(tokenize, batched=True, batch_size=len(test_dataset))
        test_dataset_20NG = test_dataset_20NG.map(tokenize, batched=True, batch_size=len(test_dataset_20NG))
        train_dataset.rename_column_('label', 'labels')
        eval_dataset.rename_column_('label', 'labels')
        test_dataset.rename_column_('label', 'labels')
        test_dataset_20NG.rename_column_('label', 'labels')
        train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        eval_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset_20NG.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        if len(np.array(train_dataset['labels']).shape) > 1:
            soft_target = True

        train_batch_size = 3
        eval_batch_size = 32
        num_epoch = 3
        gradient_accumulation_steps=1
        max_steps = int((len(train_dataset) * num_epoch / gradient_accumulation_steps) / train_batch_size)
        
        training_args = TrainingArguments(
            seed=1,
            # adafactor=True,
            output_dir=checkpoint,
            overwrite_output_dir=True,
            max_steps=max_steps,
            save_steps=int(max_steps / 10),
            save_total_limit=1,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=eval_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps, 
            warmup_steps=int(max_steps / 10),
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=5000,
            logging_first_step=True,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            evaluation_strategy="steps",
            run_name=checkpoint,
            label_names=['World', 'Sports', 'Business', 'Sci/Tech']
        )
        
        if soft_target:
            trainer = Trainer_w_soft_target(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics_w_soft_target,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
                data_collator=DefaultCollator(),
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )
        else: 
            trainer = Trainer(
                model=model,
                args=training_args,
                compute_metrics=compute_metrics,
                train_dataset=train_dataset,
                eval_dataset=test_dataset,
                callbacks=[EarlyStoppingCallback(early_stopping_patience=10)]
            )

        if not eval_only:
            trainer.train()
            
        trainer.compute_metrics = compute_metrics
        
        # test with ORIG data
        trainer.eval_dataset = test_dataset
        out_orig = trainer.evaluate()
        out_orig['run'] = checkpoint
        out_orig['test'] = "ORIG"
        print('ORIG for {}\n{}'.format(checkpoint, out_orig))
        
        # test with 20NG data
        trainer.eval_dataset = test_dataset_20NG
        out_20NG = trainer.evaluate()
        out_20NG['run'] = checkpoint
        out_20NG['test'] = "20NG"
        print('20NG for {}\n{}'.format(checkpoint, out_20NG))
        
        results.append(out_orig)
        results.append(out_20NG)
        
        # run.finish()

Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Loading cached split indices for dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-7b0d9dab769440fa.arrow and C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-f29fa025f9f2918b.arrow
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictio

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

rename_column_ is deprecated and will be removed in the next major version of datasets. Use the dataset.rename_column method instead.





W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
5000,0.5568,0.492646,0.903684,0.902957,0.903823,0.903684,98.9523,76.805
10000,0.4594,0.490654,0.908816,0.908583,0.91079,0.908816,98.868,76.87
15000,0.5057,0.539438,0.897763,0.896793,0.90032,0.897763,98.8211,76.907
20000,0.7025,0.835784,0.601711,0.545635,0.625709,0.601711,98.8569,76.879
25000,0.624,0.597516,0.898553,0.898295,0.900086,0.898553,98.8571,76.879
30000,0.6921,0.698522,0.682237,0.608888,0.832951,0.682237,98.6748,77.021
35000,0.8007,1.066945,0.458947,0.342833,0.314954,0.458947,98.672,77.023
40000,1.0376,1.185325,0.395,0.295704,0.417244,0.395,98.5612,77.109
45000,1.1136,1.158277,0.394868,0.295475,0.318003,0.394868,98.5912,77.086
50000,1.124,1.218403,0.367895,0.270271,0.31627,0.367895,98.6045,77.076


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+ORIG
{'eval_loss': 0.49065423011779785, 'eval_accuracy': 0.9088157894736842, 'eval_f1': 0.9085834129134183, 'eval_precision': 0.9107896040793245, 'eval_recall': 0.9088157894736842, 'eval_runtime': 99.469, 'eval_samples_per_second': 76.406, 'epoch': 1.58, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+ORIG', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+ORIG
{'eval_loss': 0.9149090051651001, 'eval_accuracy': 0.8270152505446623, 'eval_f1': 0.5886300217562167, 'eval_precision': 0.615185509988166, 'eval_recall': 0.5785591308748507, 'eval_runtime': 90.3274, 'eval_samples_per_second': 76.223, 'epoch': 1.58, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+ORIG', 'test': '20NG'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.





Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Runtime,Samples Per Second
5000,0.647,0.460409,0.910658,0.910573,0.911071,0.910658,99.3448,76.501
10000,0.4757,0.436451,0.913816,0.9137,0.915907,0.913816,99.4056,76.454
15000,0.4927,0.426313,0.914474,0.914202,0.914346,0.914474,99.4474,76.422
20000,0.4911,0.441877,0.914737,0.91496,0.915563,0.914737,99.3451,76.501
25000,0.548,0.488029,0.902632,0.90296,0.903743,0.902632,99.2754,76.555
30000,0.6548,0.642445,0.871974,0.871628,0.874178,0.871974,99.1917,76.619
35000,1.2265,1.402693,0.25,0.1,0.0625,0.25,98.9879,76.777
40000,1.3755,1.359066,0.280789,0.158534,0.305398,0.280789,98.939,76.815
45000,1.3885,1.391648,0.25,0.1,0.0625,0.25,98.8465,76.887
50000,1.402,1.392966,0.25,0.1,0.0625,0.25,98.8586,76.878


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+INV
{'eval_loss': 0.4418773651123047, 'eval_accuracy': 0.9147368421052632, 'eval_f1': 0.9149598750102417, 'eval_precision': 0.9155630970986736, 'eval_recall': 0.9147368421052632, 'eval_runtime': 99.6746, 'eval_samples_per_second': 76.248, 'epoch': 0.92, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+INV', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+INV
{'eval_loss': 0.8254008293151855, 'eval_accuracy': 0.8059549745824256, 'eval_f1': 0.6109898426963128, 'eval_precision': 0.6254659259028686, 'eval_recall': 0.6127472354625891, 'eval_runtime': 90.424, 'eval_samples_per_second': 76.141, 'epoch': 0.92, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+INV', 'test': '20NG'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.7472,0.5716,0.792313,156.887,76.488
10000,0.5659,0.584571,0.755744,156.948,76.458
15000,0.5832,0.609265,0.740225,157.3301,76.273
20000,0.5888,0.53811,0.779628,157.2328,76.32
25000,0.6188,0.644749,0.764242,157.3322,76.272
30000,0.644,0.59204,0.774857,157.1985,76.337
35000,0.6502,0.736152,0.749652,157.2706,76.302
40000,0.6375,0.655382,0.759354,157.2324,76.32
45000,0.6628,0.721688,0.754072,157.1994,76.336
50000,0.6627,0.698518,0.756261,156.9891,76.438


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+SIB
{'eval_loss': 27.09234046936035, 'eval_accuracy': 0.9101315789473684, 'eval_f1': 0.9098827601773326, 'eval_precision': 0.9099296533934368, 'eval_recall': 0.9101315789473684, 'eval_runtime': 99.608, 'eval_samples_per_second': 76.299, 'epoch': 0.72, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+SIB', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+SIB
{'eval_loss': 30.235000610351562, 'eval_accuracy': 0.8688453159041394, 'eval_f1': 0.6595550368211807, 'eval_precision': 0.6726531427695707, 'eval_recall': 0.6488029181145984, 'eval_runtime': 90.5378, 'eval_samples_per_second': 76.046, 'epoch': 0.72, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+SIB', 'test': '20NG'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.6839,0.567118,0.838544,157.8123,76.04
10000,0.5019,0.559269,0.83879,157.7106,76.089
15000,0.5167,0.50816,0.833746,157.6695,76.109
20000,0.5039,0.507507,0.823125,157.7561,76.067
25000,0.5549,0.575236,0.843242,157.6029,76.141
30000,0.6013,0.632859,0.807074,157.5383,76.172
35000,0.6156,0.63074,0.821004,157.2539,76.31
40000,0.9441,1.391537,0.256249,157.1745,76.348
45000,1.3999,1.39503,0.239917,157.0638,76.402
50000,1.4022,1.400608,0.258777,157.149,76.361


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+INVSIB
{'eval_loss': 29.666908264160156, 'eval_accuracy': 0.9122368421052631, 'eval_f1': 0.9119077705652677, 'eval_precision': 0.9121903860649965, 'eval_recall': 0.9122368421052631, 'eval_runtime': 99.59, 'eval_samples_per_second': 76.313, 'epoch': 0.99, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+INVSIB', 'test': 'ORIG'}


  _warn_prf(average, modifier, msg_start, len(result))


20NG for pretrained/bert-base-uncased-ag_news-ORIG+INVSIB
{'eval_loss': 33.68073272705078, 'eval_accuracy': 0.8259985475671751, 'eval_f1': 0.6284928916191954, 'eval_precision': 0.6394804266080366, 'eval_recall': 0.619568438928964, 'eval_runtime': 90.5794, 'eval_samples_per_second': 76.011, 'epoch': 0.99, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+INVSIB', 'test': '20NG'}


Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCla

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Loading cached processed dataset at C:\Users\sleev\.cache\huggingface\datasets\ag_news\default\0.0.0\fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a\cache-ad6459d6b11e013c.arrow





HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.


Step,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
5000,0.7335,0.563017,0.799593,157.4243,76.227
10000,0.5533,0.610933,0.792871,157.2682,76.303
15000,0.5714,0.559691,0.777036,157.2237,76.324
20000,0.5747,0.629329,0.767251,157.2867,76.294
25000,0.5946,0.571606,0.738264,157.1523,76.359
30000,0.6133,0.579555,0.7582,157.0933,76.388
35000,0.6052,0.576157,0.779512,157.0824,76.393
40000,0.5965,0.606168,0.759818,156.983,76.441
45000,0.5954,0.550446,0.787992,156.9571,76.454
50000,0.5934,0.616602,0.767983,157.0883,76.39


ORIG for pretrained/bert-base-uncased-ag_news-ORIG+TextMix
{'eval_loss': 28.266122817993164, 'eval_accuracy': 0.9084210526315789, 'eval_f1': 0.9080914367648951, 'eval_precision': 0.9081401850368371, 'eval_recall': 0.908421052631579, 'eval_runtime': 98.9997, 'eval_samples_per_second': 76.768, 'epoch': 0.72, 'run': 'pretrained/bert-base-uncased-ag_news-ORIG+TextMix', 'test': 'ORIG'}


In [None]:
df = pd.DataFrame(results)
df

In [None]:
df.to_csv('train_AG_NEWS_r2.csv')

In [None]:
df.to_clipboard(excel=True)