<a href="https://colab.research.google.com/github/arghyadeep99/Multi-label-Emotion-Classification-using-PyTorch-and-W-B/blob/main/Multi-label%20Emotion%20Classification%20with%20Pytorch%2C%20transformers%20and%20W%26B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip3 install datasets transformers -q

[K     |████████████████████████████████| 264 kB 6.5 MB/s 
[K     |████████████████████████████████| 2.6 MB 12.9 MB/s 
[K     |████████████████████████████████| 243 kB 60.7 MB/s 
[K     |████████████████████████████████| 118 kB 55.2 MB/s 
[K     |████████████████████████████████| 43 kB 2.6 MB/s 
[K     |████████████████████████████████| 3.3 MB 60.2 MB/s 
[K     |████████████████████████████████| 636 kB 57.3 MB/s 
[K     |████████████████████████████████| 895 kB 57.4 MB/s 
[?25h

In [None]:
!pip3 install wandb --upgrade -q

[K     |████████████████████████████████| 1.6 MB 8.4 MB/s 
[K     |████████████████████████████████| 133 kB 61.5 MB/s 
[K     |████████████████████████████████| 170 kB 55.1 MB/s 
[K     |████████████████████████████████| 97 kB 8.7 MB/s 
[K     |████████████████████████████████| 63 kB 2.2 MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm

from datasets import load_dataset
import random
from sklearn import metrics, model_selection, preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import transformers
from transformers import AdamW, get_linear_schedule_with_warmup

In [None]:
def seed_everything(seed=73):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True

seed_everything(1234)

In [None]:
import wandb

wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
sweep_config = {
    'method': 'random', #grid, random, bayesian
    'metric': {
      'name': 'auc_score',
      'goal': 'maximize'   
    },
    'parameters': {

        'learning_rate': {
            'values': [5e-5, 3e-5]
        },
        'batch_size': {
            'values': [32, 64]
        },
        'epochs':{'value': 10},
        'dropout':{
            'values': [0.3, 0.4, 0.5]
        },
        'tokenizer_max_len': {'value': 40},
    }
}

sweep_defaults = {
    'learning_rate': 3e-5,
    'batch_size': 64,
    'epochs': 10,
    'dropout': 0.3,
    'tokenizer_max_len': 40
}

sweep_id = wandb.sweep(sweep_config, project='bhaavnaye')

In [None]:
go_emotions = load_dataset("go_emotions")
data = go_emotions.data

Downloading:   0%|          | 0.00/2.02k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.67k [00:00<?, ?B/s]

No config specified, defaulting to: go_emotions/simplified


Downloading and preparing dataset go_emotions/simplified (download: 4.19 MiB, generated: 5.03 MiB, post-processed: Unknown size, total: 9.22 MiB) to /root/.cache/huggingface/datasets/go_emotions/simplified/0.0.0/2637cfdd4e64d30249c3ed2150fa2b9d279766bfcd6a809b9f085c61a90d776d...


Downloading:   0%|          | 0.00/1.61M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/203k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/201k [00:00<?, ?B/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset go_emotions downloaded and prepared to /root/.cache/huggingface/datasets/go_emotions/simplified/0.0.0/2637cfdd4e64d30249c3ed2150fa2b9d279766bfcd6a809b9f085c61a90d776d. Subsequent calls will reuse this data.


In [None]:
train, valid, test = data["train"].to_pandas(), data["validation"].to_pandas(), data["test"].to_pandas()

In [None]:
print(train.shape, valid.shape, test.shape) 

(43410, 3) (5426, 3) (5427, 3)


In [None]:
train.head()

Unnamed: 0,text,labels,id
0,My favourite food is anything I didn't have to...,[27],eebbqej
1,"Now if he does off himself, everyone will thin...",[27],ed00q6i
2,WHY THE FUCK IS BAYLESS ISOING,[2],eezlygj
3,To make her feel threatened,[14],ed7ypvh
4,Dirty Southern Wankers,[3],ed0bdzj


In [None]:
mapping = {
    0:"admiration",
    1:"amusement",
    2:"anger",
    3:"annoyance",
    4:"approval",
    5:"caring",
    6:"confusion",
    7:"curiosity",
    8:"desire",
    9:"disappointment",
    10:"disapproval",
    11:"disgust",
    12:"embarrassment",
    13:"excitement",
    14:"fear",
    15:"gratitude",
    16:"grief",
    17:"joy",
    18:"love",
    19:"nervousness",
    20:"optimism",
    21:"pride",
    22:"realization",
    23:"relief",
    24:"remorse",
    25:"sadness",
    26:"surprise",
    27:"neutral",
}

n_labels = len(mapping)

In [None]:
def one_hot_encoder(df):
    one_hot_encoding = []
    for i in tqdm(range(len(df))):
        temp = [0]*n_labels
        label_indices = df.iloc[i]["labels"]
        for index in label_indices:
            temp[index] = 1
        one_hot_encoding.append(temp)
    return pd.DataFrame(one_hot_encoding)

In [None]:
train_ohe_labels = one_hot_encoder(train)
valid_ohe_labels = one_hot_encoder(valid)
test_ohe_labels = one_hot_encoder(test)

  0%|          | 0/43410 [00:00<?, ?it/s]

  0%|          | 0/5426 [00:00<?, ?it/s]

  0%|          | 0/5427 [00:00<?, ?it/s]

In [None]:
train_ohe_labels.shape

(43410, 28)

In [None]:
train = pd.concat([train, train_ohe_labels], axis=1)
valid = pd.concat([valid, valid_ohe_labels], axis=1)
test = pd.concat([test, test_ohe_labels], axis=1)

In [None]:
train.head()

Unnamed: 0,text,labels,id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27
0,My favourite food is anything I didn't have to...,[27],eebbqej,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,"Now if he does off himself, everyone will thin...",[27],ed00q6i,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,WHY THE FUCK IS BAYLESS ISOING,[2],eezlygj,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,To make her feel threatened,[14],ed7ypvh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Dirty Southern Wankers,[3],ed0bdzj,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
def inspect_category_wise_data(label, n=5):
    samples = train[train[label] == 1].sample(n)
    sentiment = mapping[label]
    
    print(f"{n} samples from {sentiment} sentiment: \n")
    for text in samples["text"]:
        print(text, end='\n\n')

In [None]:
inspect_category_wise_data(4)

5 samples from approval sentiment: 

Yeah dude I like the brown sugar ones a lot, what’s the problem?

Yes. Combined.

Completely understandable. Maybe mom should book flights a little bit farther in advance next time though!

Doesn't mean any of those thing you liked are going to change. Not immediately anyways.

Ah ok! I figured I was getting it wrong.



In [None]:
class GoEmotionDataset:
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels

        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts[index]
        label = self.labels[index]

        inputs = self.tokenizer.__call__(text,
                                        None,
                                        add_special_tokens=True,
                                        max_length=self.max_len,
                                        padding="max_length",
                                        truncation=True,
                                        )
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]

        return {
            "ids": torch.tensor(ids, dtype=torch.long),
            "mask": torch.tensor(mask, dtype=torch.long),
            "labels": torch.tensor(label, dtype=torch.long)
        }

In [None]:
class GoEmotionClassifier(nn.Module):
    def __init__(self, n_train_steps, n_classes, do_prob, bert_model):
        super(GoEmotionClassifier, self).__init__()
        self.bert = bert_model
        self.dropout = nn.Dropout(do_prob)
        self.out = nn.Linear(768, n_classes)
        self.n_train_steps = n_train_steps
        self.step_scheduler_after = "batch"

    def forward(self, ids, mask):
        output_1 = self.bert(ids, attention_mask=mask)["pooler_output"]
        output_2 = self.dropout(output_1)
        output = self.out(output_2)
        return output

In [None]:
tokenizer = transformers.SqueezeBertTokenizer.from_pretrained("squeezebert/squeezebert-uncased", do_lower_case=True)

def build_dataset(tokenizer_max_len):
    train_dataset = GoEmotionDataset(train.text.tolist(), train[range(n_labels)].values.tolist(), tokenizer, tokenizer_max_len)
    valid_dataset = GoEmotionDataset(valid.text.tolist(), valid[range(n_labels)].values.tolist(), tokenizer, tokenizer_max_len)
    
    return train_dataset, valid_dataset

def build_dataloader(train_dataset, valid_dataset, batch_size):
    train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    valid_data_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=1)

    return train_data_loader, valid_data_loader

def ret_model(n_train_steps, do_prob):
  model = GoEmotionClassifier(n_train_steps, n_labels, do_prob, bert_model=bert_model)
  return model

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/500 [00:00<?, ?B/s]

In [None]:
sample_train_dataset, _ = build_dataset(40)
print(sample_train_dataset[0])
len(sample_train_dataset)

{'ids': tensor([ 101, 2026, 8837, 2833, 2003, 2505, 1045, 2134, 1005, 1056, 2031, 2000,
        5660, 2870, 1012,  102,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0]), 'mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'labels': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1])}


43410

In [None]:
bert_model = transformers.SqueezeBertModel.from_pretrained("squeezebert/squeezebert-uncased")

Downloading:   0%|          | 0.00/103M [00:00<?, ?B/s]

Some weights of the model checkpoint at squeezebert/squeezebert-uncased were not used when initializing SqueezeBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing SqueezeBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SqueezeBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
def ret_optimizer(model):
    '''
    Taken from Abhishek Thakur's Tez library example: 
    https://github.com/abhishekkrthakur/tez/blob/main/examples/text_classification/binary.py
    '''
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.001,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]
    opt = AdamW(optimizer_parameters, lr=wandb.config.learning_rate)
    return opt

def ret_scheduler(optimizer, num_train_steps):
    sch = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)
    return sch

def loss_fn(outputs, labels):
    if labels is None:
        return None
    return nn.BCEWithLogitsLoss()(outputs, labels.float())

def log_metrics(preds, labels):
    preds = torch.stack(preds)
    preds = preds.cpu().detach().numpy()
    labels = torch.stack(labels)
    labels = labels.cpu().detach().numpy()
    
    '''
    auc_micro_list = []
    for i in range(n_labels):
      current_pred = preds.T[i]
      current_label = labels.T[i]
      fpr_micro, tpr_micro, _ = metrics.roc_curve(current_label.T, current_pred.T)
      auc_micro = metrics.auc(fpr_micro, tpr_micro)
      auc_micro_list.append(auc_micro)
    
    return {"auc": np.array(auc_micro).mean()}
    '''

    fpr_micro, tpr_micro, _ = metrics.roc_curve(labels.ravel(), preds.ravel())
    
    auc_micro = metrics.auc(fpr_micro, tpr_micro)
    return {"auc_micro": auc_micro}

In [None]:
def train_fn(data_loader, model, optimizer, device, scheduler):
    '''
        Modified from Abhishek Thakur's BERT example: 
        https://github.com/abhishekkrthakur/bert-sentiment/blob/master/src/engine.py
    '''

    train_loss = 0.0
    model.train()
    for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
        ids = d["ids"]
        mask = d["mask"]
        targets = d["labels"]

        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        targets = targets.to(device, dtype=torch.float)

        optimizer.zero_grad()
        outputs = model(ids=ids, mask=mask)

        loss = loss_fn(outputs, targets)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        scheduler.step()
    return train_loss
    

def eval_fn(data_loader, model, device):
    '''
        Modified from Abhishek Thakur's BERT example: 
        https://github.com/abhishekkrthakur/bert-sentiment/blob/master/src/engine.py
    '''
    eval_loss = 0.0
    model.eval()
    fin_targets = []
    fin_outputs = []
    with torch.no_grad():
        for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
            ids = d["ids"]
            mask = d["mask"]
            targets = d["labels"]

            ids = ids.to(device, dtype=torch.long)
            mask = mask.to(device, dtype=torch.long)
            targets = targets.to(device, dtype=torch.float)

            outputs = model(ids=ids, mask=mask)
            loss = loss_fn(outputs, targets)
            eval_loss += loss.item()
            fin_targets.extend(targets)
            fin_outputs.extend(torch.sigmoid(outputs))
    return eval_loss, fin_outputs, fin_targets

In [None]:
def trainer(config=None):
    with wandb.init(config=config):
        config = wandb.config

        train_dataset, valid_dataset = build_dataset(config.tokenizer_max_len)
        train_data_loader, valid_data_loader = build_dataloader(train_dataset, valid_dataset, config.batch_size)
        print("Length of Train Dataloader: ", len(train_data_loader))
        print("Length of Valid Dataloader: ", len(valid_data_loader))

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        n_train_steps = int(len(train_dataset) / config.batch_size * 10)

        model = ret_model(n_train_steps, config.dropout)
        optimizer = ret_optimizer(model)
        scheduler = ret_scheduler(optimizer, n_train_steps)
        model.to(device)
        model = nn.DataParallel(model)
        wandb.watch(model)
        
        n_epochs = config.epochs

        best_val_loss = 100
        for epoch in tqdm(range(n_epochs)):
            train_loss = train_fn(train_data_loader, model, optimizer, device, scheduler)
            eval_loss, preds, labels = eval_fn(valid_data_loader, model, device)
          
            auc_score = log_metrics(preds, labels)["auc_micro"]
            print("AUC score: ", auc_score)
            avg_train_loss, avg_val_loss = train_loss / len(train_data_loader), eval_loss / len(valid_data_loader)
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": avg_train_loss,
                "val_loss": avg_val_loss,
                "auc_score": auc_score,
            })
            print("Average Train loss: ", avg_train_loss)
            print("Average Valid loss: ", avg_val_loss)

            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save(model.state_dict(), "./best_model.pt")  
                print("Model saved as current val_loss is: ", best_val_loss)    

In [None]:
wandb.agent(sweep_id, function=trainer, count=6)

[34m[1mwandb[0m: Agent Starting Run: jz04gyb1 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	tokenizer_max_len: 40
[34m[1mwandb[0m: Currently logged in as: [33marghyadeep[0m (use `wandb login --relogin` to force relogin)


Length of Train Dataloader:  1357
Length of Valid Dataloader:  170


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9306202349042465
Average Train loss:  0.14250454368095847
Average Valid loss:  0.09914311771007145
Model saved as current val_loss is:  0.09914311771007145


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9458977154591968
Average Train loss:  0.0956896783848545
Average Valid loss:  0.08925008061615859
Model saved as current val_loss is:  0.08925008061615859


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9512923916211422
Average Train loss:  0.08496551455033555
Average Valid loss:  0.08734406754374505
Model saved as current val_loss is:  0.08734406754374505


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9511434560622385
Average Train loss:  0.07764908030091092
Average Valid loss:  0.08697797165197485
Model saved as current val_loss is:  0.08697797165197485


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9518852585489684
Average Train loss:  0.07088046431179071
Average Valid loss:  0.08816209547221661


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9508637079986454
Average Train loss:  0.06494349605000976
Average Valid loss:  0.08945691228351173


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9494710036732434
Average Train loss:  0.05953421627494589
Average Valid loss:  0.09201766182394588


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9475119870181683
Average Train loss:  0.05510777140732143
Average Valid loss:  0.09511752363075228


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9467232712464999
Average Train loss:  0.051605246812873845
Average Valid loss:  0.09616154662388213


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9458711490152061
Average Train loss:  0.04910355681716825
Average Valid loss:  0.09729742048417821


VBox(children=(Label(value=' 0.45MB of 0.45MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,10.0
train_loss,0.0491
val_loss,0.0973
auc_score,0.94587
_runtime,2096.0
_timestamp,1629176960.0
_step,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▄▃▃▂▂▁▁▁
val_loss,█▂▁▁▂▂▄▆▆▇
auc_score,▁▆████▇▇▆▆
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: yas1zkxs with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	tokenizer_max_len: 40


Length of Train Dataloader:  679
Length of Valid Dataloader:  85


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9029218371592804
Average Train loss:  0.1695271676326711
Average Valid loss:  0.11347967877107508
Model saved as current val_loss is:  0.11347967877107508


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9242215868761217
Average Train loss:  0.08457200121085141
Average Valid loss:  0.10138595411006142
Model saved as current val_loss is:  0.10138595411006142


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9248970171363176
Average Train loss:  0.06586780406216458
Average Valid loss:  0.0996117041391485
Model saved as current val_loss is:  0.0996117041391485


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9261515801528553
Average Train loss:  0.05676024977737744
Average Valid loss:  0.10119513790397083


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9265392637170273
Average Train loss:  0.05092326529730051
Average Valid loss:  0.1038422604694086


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9263487374232745
Average Train loss:  0.04729195896519652
Average Valid loss:  0.1058826537693248


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9265912712504629
Average Train loss:  0.044491316643111484
Average Valid loss:  0.1079144653152017


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9266883985013767
Average Train loss:  0.04212870587634584
Average Valid loss:  0.10942283737308839


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9258647095103465
Average Train loss:  0.040781386106087114
Average Valid loss:  0.11052591064397027


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9259468361620762
Average Train loss:  0.039597219610600476
Average Valid loss:  0.1106320470571518


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,10.0
train_loss,0.0396
val_loss,0.11063
auc_score,0.92595
_runtime,1798.0
_timestamp,1629178773.0
_step,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▂▁▂▃▄▅▆▇▇
auc_score,▁▇▇███████
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: jzx67hyt with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	tokenizer_max_len: 40


Length of Train Dataloader:  1357
Length of Valid Dataloader:  170


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9191280620520281
Average Train loss:  0.10650615153906973
Average Valid loss:  0.10157510776291875
Model saved as current val_loss is:  0.10157510776291875


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9238466725861392
Average Train loss:  0.05424602519701576
Average Valid loss:  0.10840069066513987


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9242227946130818
Average Train loss:  0.0452103583154515
Average Valid loss:  0.11402925055693178


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9281234796944687
Average Train loss:  0.039054059653420084
Average Valid loss:  0.11835276725537637


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9268495891174403
Average Train loss:  0.034555315046228725
Average Valid loss:  0.12257486362667645


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9247172958615468
Average Train loss:  0.030783972927343785
Average Valid loss:  0.12589872492586865


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9229509253666588
Average Train loss:  0.02734811708822506
Average Valid loss:  0.1303741236381671


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9236134355874626
Average Train loss:  0.0246552791828722
Average Valid loss:  0.13162133785731653


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9236961550695058
Average Train loss:  0.022561630240299065
Average Valid loss:  0.13365529635373283


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9228078179597194
Average Train loss:  0.020777128055532494
Average Valid loss:  0.13506130418356727


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,10.0
train_loss,0.02078
val_loss,0.13506
auc_score,0.92281
_runtime,2089.0
_timestamp,1629180877.0
_step,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▃▂▂▂▂▁▁▁
val_loss,▁▂▄▅▅▆▇▇██
auc_score,▁▅▅█▇▅▄▄▅▄
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 5pw2k7j1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	tokenizer_max_len: 40


Length of Train Dataloader:  679
Length of Valid Dataloader:  85


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.8925335891948045
Average Train loss:  0.15700088847916854
Average Valid loss:  0.11317366510629653
Model saved as current val_loss is:  0.11317366510629653


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.8993353607591605
Average Train loss:  0.059337010906134156
Average Valid loss:  0.10930232756278094
Model saved as current val_loss is:  0.10930232756278094


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9008505456580354
Average Train loss:  0.03983743235589566
Average Valid loss:  0.11483153692063164


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9054176867009498
Average Train loss:  0.03178092149060236
Average Valid loss:  0.11776700195144205


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9047773998094156
Average Train loss:  0.02717380997325693
Average Valid loss:  0.12415659059496487


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9077425367348031
Average Train loss:  0.024205066236243508
Average Valid loss:  0.12640349610763438


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9050390070500393
Average Train loss:  0.022056856082919685
Average Valid loss:  0.130409452494453


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9058358000674221
Average Train loss:  0.02024149483368788
Average Valid loss:  0.13323394410750444


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9042500500540471
Average Train loss:  0.019211490694104265
Average Valid loss:  0.13494542933562223


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9042720956957568
Average Train loss:  0.018517421069520694
Average Valid loss:  0.13587398634237402


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,10.0
train_loss,0.01852
val_loss,0.13587
auc_score,0.90427
_runtime,1803.0
_timestamp,1629182695.0
_step,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▂▁▁▁▁▁▁
val_loss,▂▁▂▃▅▆▇▇██
auc_score,▁▄▅▇▇█▇▇▆▆
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: c66ub7l4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	tokenizer_max_len: 40


Length of Train Dataloader:  679
Length of Valid Dataloader:  85


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.8861090192439289
Average Train loss:  0.11559864736007135
Average Valid loss:  0.1116446160218295
Model saved as current val_loss is:  0.1116446160218295


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9004210252886659
Average Train loss:  0.03926441041047689
Average Valid loss:  0.12179267634363736


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9060714735394577
Average Train loss:  0.028834880433910814
Average Valid loss:  0.1264783740920179


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9029705682418012
Average Train loss:  0.023656399506356826
Average Valid loss:  0.1337036891018643


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9021206116449492
Average Train loss:  0.020382753747552412
Average Valid loss:  0.13872094364727244


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.8986403907902966
Average Train loss:  0.01789930674270761
Average Valid loss:  0.14540068892871633


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.8986397274234061
Average Train loss:  0.01569068189765243
Average Valid loss:  0.14760821829823886


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.9010923881190819
Average Train loss:  0.014112801951478305
Average Valid loss:  0.15052380132324555


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.8996455300098998
Average Train loss:  0.013048751400756226
Average Valid loss:  0.15268301657017538


  0%|          | 0/679 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

AUC score:  0.8994679792155953
Average Train loss:  0.01227656977809417
Average Valid loss:  0.15368895329096738


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,10.0
train_loss,0.01228
val_loss,0.15369
auc_score,0.89947
_runtime,1808.0
_timestamp,1629184518.0
_step,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,▁▃▃▅▆▇▇▇██
auc_score,▁▆█▇▇▅▅▆▆▆
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█


[34m[1mwandb[0m: Agent Starting Run: 02xdfdcj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	tokenizer_max_len: 40


Length of Train Dataloader:  1357
Length of Valid Dataloader:  170


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.8993423040351746
Average Train loss:  0.07977044348609781
Average Valid loss:  0.11903628268662621
Model saved as current val_loss is:  0.11903628268662621


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.8977751471403761
Average Train loss:  0.02749540036518124
Average Valid loss:  0.13327915322254685


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9092206646238412
Average Train loss:  0.021541224361940198
Average Valid loss:  0.14193086080691394


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.8999627340726686
Average Train loss:  0.017799338120136714
Average Valid loss:  0.1511405052507625


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9014540081489022
Average Train loss:  0.014967474784817362
Average Valid loss:  0.15675339229843196


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9029768783039656
Average Train loss:  0.012809264032662948
Average Valid loss:  0.15845848733887952


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.8976236071126025
Average Train loss:  0.010875756537741463
Average Valid loss:  0.16725738565711415


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.9005611496983876
Average Train loss:  0.009219760536068643
Average Valid loss:  0.17009967955596306


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.8985196887077639
Average Train loss:  0.008045877284537137
Average Valid loss:  0.17286845257177072


  0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

AUC score:  0.8991755361835193
Average Train loss:  0.007275935653123563
Average Valid loss:  0.1723105186048676


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,10.0
train_loss,0.00728
val_loss,0.17231
auc_score,0.89918
_runtime,2128.0
_timestamp,1629186661.0
_step,9.0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▂▂▂▁▁▁▁
val_loss,▁▃▄▅▆▆▇███
auc_score,▂▁█▂▃▄▁▃▂▂
_runtime,▁▂▃▃▄▅▆▆▇█
_timestamp,▁▂▃▃▄▅▆▆▇█
_step,▁▂▃▃▄▅▆▆▇█
