In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from transformers import pipeline, AutoTokenizer, AutoModel,AutoConfig,get_constant_schedule_with_warmup, BertForSequenceClassification, BertTokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score,f1_score,average_precision_score
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler, RandomSampler

import torch
from torch import nn
import time
import random
import datetime
import math

from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split


tqdm.pandas()

In [2]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0,is_loss=True):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.is_loss = is_loss
        if is_loss:
            self.min_validation_loss = np.inf
        else:
            self.min_validation_loss = -np.inf

    def early_stop(self, validation_loss,save_best=False,model=None,model_path="temp"):
        if self.is_loss:
            if validation_loss < self.min_validation_loss:
                self.min_validation_loss = validation_loss
                self.counter = 0
                print("NEW LOWEST LOSS ",self.min_validation_loss)
                if save_best:
                    torch.save(model.state_dict(), model_path)
            elif validation_loss >= (self.min_validation_loss + self.min_delta):
                self.counter += 1
                if self.counter >= self.patience:
                    return True
            return False
        
        else:
            if validation_loss > self.min_validation_loss:
                self.min_validation_loss = validation_loss
                self.counter = 0
                print("NEW HIGHEST SCORE ",self.min_validation_loss)
                if save_best:
                    torch.save(model.state_dict(), model_path)
            elif validation_loss <= (self.min_validation_loss + self.min_delta):
                self.counter += 1
                if self.counter >= self.patience:
                    return True
            return False

In [3]:
def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [4]:
# dessa vez sem fixar a seed, exceto para a divisão dos dados
seed_val = 2023
if torch.cuda.is_available():
  device = 0

In [5]:
max_length = 128


remove_neutral_instances = False


learning_rate =  5e-5
epsilon = 1e-8
num_train_epochs = 15
multi_gpu = True
filter_emotions = False

apply_scheduler = True
warmup_proportion = 0.1

T = 8

print_each_n_step = 100

batch_size = 32

model_path = "neuralmind/bert-base-portuguese-cased"
#model_path = "models/go_emotions"

alpha = 0.5


In [6]:
df = pd.read_csv("../features/both tasks facebook + reddit.csv",index_col = 0).reset_index(drop=True)


In [7]:
emotion_dict = {
    "admiração": 0,
    "diversão": 1,
    "raiva": 2,
    "aborrecimento": 3,
    "aprovação": 4,
    "zelo": 5,
    "confusão": 6,
    "curiosidade": 7,
    "desejo": 8,
    "decepção": 9,
    "desaprovação": 10,
    "nojo": 11,
    "constrangimento": 12,
    "entusiasmo": 13,
    "medo": 14,
    "gratidão": 15,
    "luto": 16,
    "alegria": 17,
    "amor": 18,
    "nervosismo": 19,
    "otimismo": 20,
    "orgulho": 21,
    "percepção": 22,
    "alívio": 23,
    "remorso": 24,
    "tristeza": 25,
    "surpresa": 26,
    "neutro": 27
}

In [8]:
emotion_num = len(emotion_dict)

In [9]:
to_keep = ["neutro", "raiva", "zelo","confusão", "desejo","decepção","desaprovação","gratidão","luto","amor", "tristeza"]
#to_keep = ['medo', 'tristeza','raiva','zelo']

if filter_emotions:
    drop=  []
    for key in emotion_dict:
        if key in to_keep:
            drop.append(key)

    for key in drop:
        del emotion_dict[key]
    if filter_emotions:
        idx = [str(emotion_dict[key])+".1" if emotion_dict[key] < symptom_num else str(emotion_dict[key]) for key in emotion_dict ]
        train_df = train_df.drop(idx,axis=1)
    emotion_num = len(drop)   

In [10]:
df1 = pd.read_csv("../data/segredos_sentenças_multitask_clean.csv",index_col=0)

df1 = df1.drop(["Postagem com possível perfil depressivo","Alteração na eficiência",
         "Alteração da funcionalidade","*",'Agitação/inquietação','Sintoma obsessivo e compulsivo','Déficit de atenção/Memória',
              'Perda/Diminuição do prazer/ Perda/Diminuição da libido'],axis=1)

df1 = df1.reset_index(drop=True)
#df1 = df1[df1.iloc[:,1:].sum(axis=1) != 0]

#df1 = df1.reset_index(drop=True)
symptom_num = df1.iloc[:,1:].shape[1]

In [11]:
tokenizer = BertTokenizer.from_pretrained(model_path)

In [12]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

def build_dataset(df,df1):
    symptoms = df1.iloc[:,1:]
    strat = MultilabelStratifiedKFold(n_splits=2, shuffle=True, random_state=seed_val).split(df1,symptoms)
    for train_idx, test_idx in strat:

        symptoms_train = symptoms.loc[train_idx]
        symptoms_test  = symptoms.loc[test_idx]
        train_df = df.loc[train_idx]
        test_df = df.loc[test_idx]
        break
    remaining = df.loc[~df.index.isin(train_idx)]
    remaining = remaining.loc[~remaining.index.isin(test_idx)]

    train_remaining,test_remaining, _, _ = train_test_split(remaining,remaining,test_size=0.2,shuffle=True,random_state=seed_val)

    train_df = pd.concat([train_df,train_remaining]).reset_index(drop=True)
    test_df = pd.concat([test_df,test_remaining]).reset_index(drop=True)
    return train_df,test_df,symptoms_train,symptoms_test

In [13]:
max_length = 128

def get_dataloader(df,tokenizer,labels,batch_size,do_shuffle):
    examples = []
    input_ids = []
    attention_masks = []
    for text in df.text:
        encoded = tokenizer.encode_plus(text,return_attention_mask=True,add_special_tokens=True,max_length = max_length,
    padding="max_length",truncation=True)
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])
    

    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)
    df = df.drop("text",axis=1)

    logits = torch.tensor(df.iloc[:,:symptom_num + emotion_num].to_numpy(),dtype=torch.float32)
    
    labels = torch.tensor(np.concatenate((labels.to_numpy(),torch.nn.functional.sigmoid(logits[labels.shape[0]:,:symptom_num]))),dtype=torch.float32)
    labeled = torch.tensor(np.concatenate((np.ones(labels.size()[0]), np.zeros(logits.size()[0] - labels.size()[0]))))
    # modificando logits
    #with torch.no_grad():
    #    logits[:labels.size()[0],:symptom_num] = (logits[:labels.size()[0],:symptom_num] + labels)/2
    
    
    dataset = torch.utils.data.TensorDataset(input_ids,attention_masks,logits,labeled,labels)
    
    if do_shuffle:
        sampler = torch.utils.data.RandomSampler
    else:
        sampler = torch.utils.data.SequentialSampler
    return torch.utils.data.DataLoader(dataset,sampler = sampler(dataset),batch_size=batch_size)

In [14]:
class Multitask(nn.Module):
    def __init__(self,num_primary_labels, num_auxiliary_labels,transformer,input_size=512):
        super(Multitask,self).__init__()
        self.transformer = transformer
        self.primary_head = nn.Linear(input_size,num_primary_labels)
        self.auxiliary_head = nn.Linear(input_size,num_auxiliary_labels)
        
        self.sigmoid = torch.sigmoid
        
    def forward(self,input_ids,input_mask):
        input_arr = transformer(input_ids,attention_mask=input_mask)[-1]
        primary_logits = self.primary_head(input_arr)
        auxiliary_logits = self.auxiliary_head(input_arr)
        
        
        return primary_logits,auxiliary_logits


# Test

In [15]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
symptoms = df1.iloc[:,1:]
strat = MultilabelStratifiedKFold(n_splits=2, shuffle=True, random_state=seed_val).split(df1,symptoms)

for train_idx, test_idx in strat:

    train_df = df1.loc[train_idx]
    test_df  = df1.loc[test_idx]
    break

In [16]:
test_labels = test_df.iloc[:,1:]
test_text = test_df.text

In [17]:
inputs = []
attention_masks =[]

for text in test_text:
    encoding = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=128,
            return_token_type_ids=False,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
    inputs.append(encoding['input_ids'])
    attention_masks.append(encoding['attention_mask'])

labels = torch.tensor(test_labels.to_numpy(),dtype=torch.float32)    
inputs = torch.stack(inputs)
attention_masks = torch.stack(attention_masks)

dataset = TensorDataset(inputs, attention_masks,labels)
symptom_dataloader = DataLoader(
              dataset,
              sampler = SequentialSampler(dataset), 
              batch_size = batch_size)

# Loss functions

In [18]:
def focal_loss(p,targets,gamma=3,alpha=0.8):

    bce = torch.nn.BCELoss()(p,targets)
    p = torch.where(targets == 1,p,1-p)
    alpha = targets * alpha + (1-targets) * (1-alpha)
    return (alpha * (1-p) ** gamma).mean()

In [19]:
def variance_loss(losses,var_logs):
    total_loss = 0
    for i,loss in enumerate(losses):
        total_loss += loss * torch.exp(-var_logs[i]) + var_logs[i]
    #total_loss += torch.prod(torch.tensor(var_logs))
    return total_loss

In [20]:
def distillation_loss(student,teacher,weights,b_labels,labeled_weight = 0.5):
    loss = torch.nn.MSELoss(reduction='none')
    student = torch.nn.functional.sigmoid(student)
    teacher = torch.nn.functional.sigmoid(teacher)
    
    if b_labels != None:
        teacher = (teacher * (labeled_weight) + b_labels * (1-labeled_weight))

    l = loss(student,teacher)
    
    
    #l = l + (l.T * (weights * labeled_weight)).T
    return l.mean()/student.size(0)

In [21]:
def classic_distill(student,teacher,weights,b_labels,T=3):
    loss = torch.nn.KLDivLoss(reduction="batchmean")
    student = torch.nn.functional.log_softmax(student/T)
    teacher = torch.nn.functional.softmax(teacher/T)
    return loss(student,teacher)

In [22]:
def naive_combination(p_loss,a_loss,alpha=0.5):
    return p_loss * alpha + a_loss * (1-alpha)

# Training loop

We have to try really hard to not overfit the model in order for label refinery to work. So I'm going to try:

* Higher dropout values
* Mixup
* Maybe START (?)


In [23]:
def train_model(model,train_dataloader,symptom_dataloader,primary_loss,auxiliary_loss):

    stop = EarlyStopper(is_loss = False,patience=4)

    training_stats = []

    total_t0 = time.time()

    log_var_a = torch.zeros((1,), requires_grad=True,device=device)
    log_var_b = torch.zeros((1,), requires_grad=True,device=device)

    log_vars = [log_var_a,log_var_b]
    model_vars = [i for i in model.parameters()]
    optimizer =  torch.optim.AdamW([{'params':model_vars},{'params':log_vars,'lr':0.001}], lr= learning_rate)



    primary_loss_func = primary_loss
    auxiliary_loss_func = auxiliary_loss
    multitask_loss_func = naive_combination

    sigmoid = torch.sigmoid
    best_f1 = 0
    best_epoch = 0
    best_report = {}
    best_auc = 0


    for epoch_i in range(0,num_train_epochs):

        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, num_train_epochs))
        print('Training...')
        t0 = time.time()

        tr_loss = 0
        pr_loss = 0
        aux_loss = 0

        model.train()
        all_probs = []
        all_labels_ids = []
        labeled_num = 0
        for step, batch in enumerate(train_dataloader):
            if step % print_each_n_step == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)

                # Report progress.
                #print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_logits = batch[2].to(device)
            b_labeled = batch[3].to(device)
            b_labels = batch[4].to(device)
            
            b_primary_logits = b_logits[:,:symptom_num]
            b_auxiliary_logits = b_logits[:,symptom_num:]

            cur_batch_size = b_input_ids.shape[0]


            prim_outputs, aux_outputs = model(b_input_ids,b_input_mask)


            #output,targets_a,targets_b,lam = mixup(output.logits,b_labels)


            temp_func_prim = lambda x,y : primary_loss_func(x,y,b_labeled,b_labels)
            temp_func_aux = lambda x,y : auxiliary_loss_func(x,y,torch.zeros_like(b_labeled),None)


            losses = [temp_func_prim(prim_outputs,b_primary_logits),temp_func_aux(aux_outputs,b_auxiliary_logits)]
            #batch_loss = multitask_loss_func(losses,variance_vars)
            batch_loss = multitask_loss_func(losses[0],losses[1])
            tr_loss += batch_loss.item()
            pr_loss += losses[0].item()
            aux_loss += losses[1].item()
            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()



        avg_train_loss = tr_loss/len(train_dataloader)
        avg_prim_loss = pr_loss/len(train_dataloader)
        avg_aux_loss = aux_loss/len(train_dataloader)


        training_time = format_time(time.time() - t0)

        print("")
        print("  Average training loss: {0:.3f}".format(avg_train_loss))
        print("  Average primary loss: {0:.3f}".format(avg_prim_loss))
        print("  Average auxiliary loss: {0:.3f}".format(avg_aux_loss))

        print("  Training epoch took: {:}".format(training_time))
        print("  Labeled Num {:}".format(labeled_num))
        print("")
        print("Running Test...")

        model.eval()
        total_test_loss = 0

        all_probs = []
        all_labels_ids = []
        for batch in symptom_dataloader:

            b_input_ids = batch[0].reshape(len(batch[0]),max_length).to(device)
            b_input_mask = batch[1].reshape(len(batch[0]),max_length).to(device)
            b_labels = batch[2].to(device)
            cur_batch_size = b_input_ids.shape[0]


            with torch.no_grad():
                output,_ = model(b_input_ids,b_input_mask)
                preds = torch.nn.functional.sigmoid(output)
                loss = focal_loss(preds,b_labels)
                for el in preds:
                    all_probs.append(el.detach().cpu())
                for el in b_labels:
                    all_labels_ids.append(el.detach().cpu())


                total_test_loss += loss.item()

        all_probs = torch.stack(all_probs).numpy()
        all_labels_ids = torch.stack(all_labels_ids).numpy()
        cur_auc = average_precision_score(all_labels_ids,all_probs)

        print("auc:",cur_auc)    
        avg_test_loss = total_test_loss/len(symptom_dataloader)


        print("test Loss:", avg_test_loss)


        if stop.early_stop(cur_auc,save_best=True,model=model,model_path = "../models/temp"):
            break
        training_stats.append(
            {
                'epoch': epoch_i + 1,
                'Training Loss': avg_train_loss,
                'Test Loss': avg_test_loss
            })
        

In [24]:
def get_logits(df,model):
    inputs = df.text
    logits = []
    for i,example in enumerate(inputs):
    
        if i % 10000 == 0:
            print(f"{i} de {len(inputs)}")
        encoding = tokenizer.encode_plus(
            example,
            add_special_tokens=True,
            max_length=128,
            return_token_type_ids=False,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
            )
        with torch.no_grad():
            input_ids = encoding['input_ids'].to(device)
            mask = encoding['attention_mask'].to(device)
            outputs_prim,outputs_aux = model(input_ids,mask)
            outputs = torch.hstack([outputs_prim,outputs_aux])
            logit = outputs.squeeze(-2)
            del outputs
            del outputs_prim
            del outputs_aux
            del input_ids
            del mask
            
            logits.append(logit)
            del logit
    logits = np.array(torch.stack(logits).cpu())
    df = pd.DataFrame(logits).reset_index(drop=True)
    df['text'] = inputs.reset_index(drop=True)
    return df

In [25]:

from sklearn.metrics import precision_recall_curve,average_precision_score

stop_ext = EarlyStopper(is_loss = False,patience=4)
iterations = 20
for i in range(iterations):
    transformer = AutoModel.from_pretrained(model_path).to(device)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    config = AutoConfig.from_pretrained(model_path)
    hidden_size = int(config.hidden_size)
    model = Multitask(symptom_num,emotion_num,transformer,input_size = hidden_size)
    model = model.to(device)
    train_df,test_df,symptoms_train,symptoms_test = build_dataset(df,df1)
    train_dataloader = get_dataloader(train_df,tokenizer,symptoms_train,batch_size,do_shuffle=True)
    train_model(model,train_dataloader,symptom_dataloader,classic_distill,classic_distill)
    model.load_state_dict(torch.load("../models/temp"))
    
    probs = []
    for step,batch in enumerate(symptom_dataloader):
        model.eval()
        b_input_ids = batch[0].reshape(len(batch[0]),max_length).to(device)
        b_attention_mask = batch[1].reshape(len(batch[0]),max_length).to(device)


        with torch.no_grad():
            outputs, _ = model(b_input_ids,b_attention_mask)
            for b_prob in (torch.nn.functional.sigmoid(outputs).detach().cpu()):
                probs.append(b_prob)
    probs = torch.stack(probs).numpy()
    
    preds = []
    for i in range (probs.shape[1]):
        precision,recall,thresholds = precision_recall_curve(test_labels.iloc[:,i],probs[:,i])
        f1_scores = 2*recall*precision/(recall+precision)
        cur_threshold = thresholds[np.nanargmax(f1_scores)]
        print("melhor f1 para ",df.columns[i+1]," ",np.nanmax(f1_scores))
        preds.append(probs[:,i] >= cur_threshold)

    
    print("average precision score: ",average_precision_score(test_labels,probs))
    cur_auc = average_precision_score(test_labels,probs)
    if stop_ext.early_stop(cur_auc,save_best=True,model=model,model_path = "../models/temp2"):
        break
    df = get_logits(df,model)


    
    
    
    
    
    
    
    


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



Training...

  Average training loss: 0.001
  Average primary loss: 0.001
  Average auxiliary loss: 0.001
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.050167426414922535
test Loss: 0.011903780197875725
NEW HIGHEST SCORE  0.050167426414922535

Training...

  Average training loss: 0.000
  Average primary loss: 0.000
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.10010514582448783
test Loss: 0.010084602616828036
NEW HIGHEST SCORE  0.10010514582448783

Training...

  Average training loss: 0.000
  Average primary loss: 0.000
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.10743382130448001
test Loss: 0.009975787000478926
NEW HIGHEST SCORE  0.10743382130448001

Training...

  Average training loss: 0.000
  Average primary loss: 0.000
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.14792573110644555
t

  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)


0 de 6568


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



Training...

  Average training loss: 0.001
  Average primary loss: 0.001
  Average auxiliary loss: 0.001
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.0613477837617251
test Loss: 0.012517206056194531
NEW HIGHEST SCORE  0.0613477837617251

Training...

  Average training loss: 0.000
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.09130786432815573
test Loss: 0.01037988529817478
NEW HIGHEST SCORE  0.09130786432815573

Training...

  Average training loss: 0.000
  Average primary loss: 0.000
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.0941623031053843
test Loss: 0.010411896790075745
NEW HIGHEST SCORE  0.0941623031053843

Training...

  Average training loss: 0.000
  Average primary loss: 0.000
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.1339969310773229
test Loss

  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you exp


Training...

  Average training loss: 0.001
  Average primary loss: 0.001
  Average auxiliary loss: 0.001
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.06523629363771334
test Loss: 0.012211862504733977
NEW HIGHEST SCORE  0.06523629363771334

Training...

  Average training loss: 0.000
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.07605707085192753
test Loss: 0.011118159063059735
NEW HIGHEST SCORE  0.07605707085192753

Training...

  Average training loss: 0.000
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.07999431907272961
test Loss: 0.011034383407063983
NEW HIGHEST SCORE  0.07999431907272961

Training...

  Average training loss: 0.000
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.08296923144900582
tes

  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing B


Training...

  Average training loss: 0.001
  Average primary loss: 0.001
  Average auxiliary loss: 0.001
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.058829625464973834
test Loss: 0.012898132340302942
NEW HIGHEST SCORE  0.058829625464973834

Training...

  Average training loss: 0.000
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.0865428788094092
test Loss: 0.011507286753102735
NEW HIGHEST SCORE  0.0865428788094092

Training...

  Average training loss: 0.000
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.08671426485410692
test Loss: 0.012909259609374646
NEW HIGHEST SCORE  0.08671426485410692

Training...

  Average training loss: 0.000
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.08948654837338889
tes

  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
  f1_scores = 2*recall*precision/(recall+precision)
Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing B


Training...

  Average training loss: 0.001
  Average primary loss: 0.001
  Average auxiliary loss: 0.001
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.061598799115230946
test Loss: 0.012693031499086804
NEW HIGHEST SCORE  0.061598799115230946

Training...

  Average training loss: 0.001
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.07878511019273275
test Loss: 0.012425855359037381
NEW HIGHEST SCORE  0.07878511019273275

Training...

  Average training loss: 0.000
  Average primary loss: 0.001
  Average auxiliary loss: 0.000
  Training epoch took: 0:00:48
  Labeled Num 0

Running Test...
auc: 0.08225045842472671
test Loss: 0.01145086404741616
NEW HIGHEST SCORE  0.08225045842472671

Training...


KeyboardInterrupt: 