In [1]:
import numpy as np
import pandas as pd
from skmultilearn.model_selection import IterativeStratification
from auxiliar_functions import process_folds, build_report, load_dataset, predict_deep, load_embedding
from IPython.display import clear_output
import pickle
import torch 
from torch.optim import AdamW
from torch.utils.data import TensorDataset, DataLoader
from torch.nn import BCEWithLogitsLoss
from torch.utils.tensorboard import SummaryWriter
from AsymmetricLoss import AsymmetricLossOptimized
from torch.utils.data import Dataset
from transformers import BertConfig, BertTokenizer, BertForPreTraining
from sklearn.metrics import precision_score, recall_score, accuracy_score
import gc

clear_output()

In [2]:
df = load_dataset("preprocess_dataset.npy")
Y_true_ = np.array([np.array(x) for x in df.frames])

df.head()

Unnamed: 0,original_text,preprocess_text,encoded,frames,conflicto,economico,humanidad,moral
0,Japón registró un nuevo déficit comercial réco...,japón registró un nuevo déficit comercial réco...,"[8759, 8914, 9989, 9898, 6584, 8773, 8428, 999...","[0, 1, 0, 0]",0,1,0,0
1,"UDI acusa ""mala memoria"" de la Nueva Mayoría f...",udi acusa mala memoria de la nueva mayoría fre...,"[9610, 8486, 8448, 7205, 10001, 9999, 9927, 97...","[1, 0, 0, 1]",1,0,0,1
2,La misteriosa oferta por Esteban Paredes que i...,la misteriosa oferta por esteban paredes que [...,"[9999, 1121, 8346, 9990, 8487, 8596, 9996, 1, ...","[1, 0, 0, 0]",1,0,0,0
3,La familia maratón que causó revuelo en Holand...,la familia maratón que causó revuelo en holand...,"[9999, 9668, 5417, 9996, 7388, 2016, 9997, 887...","[0, 0, 1, 0]",0,0,1,0
4,Crean sitio web que recopila mangas descontin...,crean sitio web que [UNK] [UNK] [UNK] para [UN...,"[2420, 9319, 9360, 9996, 1, 1, 1, 9985, 1, 998...","[0, 1, 0, 0]",0,1,0,0


In [3]:
ORDER = ['conflicto', 'economico', 'humanidad', 'moral']

        
class InputFeatures(object):
    """A single set of features of data."""
    def __init__(self, input_ids, input_mask, segment_ids):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids


def convert_example_to_feature(example_row):
    text, max_seq_length, tokenizer, cls_token, sep_token = example_row
    tokens_a = tokenizer.tokenize(text)
    
    # Account for [CLS] and [SEP] with "- 2"
    special_tokens_count = 2
    tokens_a = tokens_a[:(max_seq_length - special_tokens_count)]
    tokens = tokens_a + [sep_token]
    segment_ids = [0] * len(tokens)

    tokens = [cls_token] + tokens
    segment_ids = [0] + segment_ids

    input_ids = tokenizer.convert_tokens_to_ids(tokens)

    # The mask has 1 for real tokens and 0 for padding tokens. Only real
    # tokens are attended to.
    input_mask = [1] * len(input_ids)

    # Zero-pad up to the sequence length.
    padding_length = max_seq_length - len(input_ids)
    
    input_ids = input_ids + ([0] * padding_length)
    input_mask = input_mask + ([0] * padding_length)
    segment_ids = segment_ids + ([0] * padding_length)

    assert len(input_ids) == max_seq_length
    assert len(input_mask) == max_seq_length
    assert len(segment_ids) == max_seq_length

    return InputFeatures(
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
    )


def convert_examples_to_features(texts, max_seq_length, tokenizer, cls_token, sep_token):
    examples = [(text, max_seq_length, tokenizer, cls_token, sep_token) for text in texts]
    return [convert_example_to_feature(example) for example in examples]


def calculate_pres_recall(preds, Y):
    pres_class = [0] * Y.shape[1]
    recall_class = [0] * Y.shape[1]
    acc_class = [0] * Y.shape[1]

    all_y_pred = []
    all_y_true = []
    for i in range(Y.shape[1]):
        y_pred = [int(pred[i]) for pred in preds]
        y_true = [int(target[i]) for target in Y]

        all_y_pred.extend(y_pred)
        all_y_true.extend(y_true)

        pres_class[i] = precision_score(y_true, y_pred, zero_division=0)
        recall_class[i] = recall_score(y_true, y_pred, zero_division=0)
        acc_class[i] = accuracy_score(y_true, y_pred)

    mean_pres = precision_score(all_y_true, all_y_pred, zero_division=0)
    mean_recall = recall_score(all_y_true, all_y_pred, zero_division=0)
    mean_acc = accuracy_score(all_y_true, all_y_pred)

    return mean_pres, mean_recall, mean_acc, pres_class, recall_class, acc_class 


def save_data(writer, all_logits, Y, total_loss, loss_class,
              total_muestras, fold_index, epoch, step):

    if writer is None:
        return

    loss = (total_loss/total_muestras)
    pres, recall, acc, pres_class, recall_class, acc_class = calculate_pres_recall(all_logits, Y)

    writer.add_scalar(f'Fold_{fold_index}/loss_{step}', loss, epoch)
    writer.add_scalar(f'Fold_{fold_index}/recall_{step}', recall, epoch)
    writer.add_scalar(f'Fold_{fold_index}/presicion_{step}', pres, epoch)
    writer.add_scalar(f'Fold_{fold_index}/acc_{step}', acc, epoch)

    for i in range(len(loss_class)):
        loss_class_train = loss_class[i]/(total_muestras/Y.shape[1])
        writer.add_scalar(f'Fold_{fold_index}/loss_class_{ORDER[i]}_{step}', loss_class_train, epoch)
        writer.add_scalar(f'Fold_{fold_index}/presicion_{ORDER[i]}_{step}', pres_class[i], epoch)
        writer.add_scalar(f'Fold_{fold_index}/recall_{ORDER[i]}_{step}', recall_class[i], epoch)
        writer.add_scalar(f'Fold_{fold_index}/acc_{ORDER[i]}_{step}', acc_class[i], epoch)


In [4]:
class BetoEmbedding(torch.nn.Module):
    def __init__(self, hidden_size, batch_size=8, take_mean=True,
                 loss_function="cross-entropy", n_epochs=30):
        super().__init__()
        gc.collect()
        torch.cuda.empty_cache()

        self.tokenizer = BertTokenizer.from_pretrained('vocab.txt', keep_accents=True)
        config = BertConfig.from_json_file('bert_config.json')
        config.output_hidden_states = True
        self.bert = BertForPreTraining.from_pretrained('pytorch_model.bin', config=config)
        for name, param in self.bert.named_parameters():
            param.requires_grad = False
            if name.startswith("bert.encoder.layer.11"):
                param.requires_grad = True
        
        self.take_mean = take_mean
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.loss_object = AsymmetricLossOptimized()
        if loss_function == "cross-entropy":
            self.loss_object = torch.nn.BCEWithLogitsLoss()
        
        self.fc1 = torch.nn.Linear(768, hidden_size)
        self.relu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(0.5)
        self.embedding_dropout = torch.nn.Dropout(0.5)
        self.fc2 = torch.nn.Linear(hidden_size, 4) # Cambiar 
        self.sigmoid = torch.nn.Sigmoid()
        
    def get_embedding(self, input_ids, attention_mask, token_type_ids):
        outputs = self.bert(input_ids, attention_mask, token_type_ids)
        # outputs[-1] = salidas de las capas ocultas (hidden_iniciales, capa_1, ..., capa_12)
        outputs = outputs[-1][-1] # Tomar de las hidden_state, la última capa
        if self.take_mean:
            output = torch.mean(outputs, 1)
        else:
            output = outputs[:, 0, :] # Tomar el primer vector (CLS)
        return output
    
    def forward(self, input_ids, attention_mask, token_type_ids):
        output = self.get_embedding(input_ids, attention_mask, token_type_ids)
        output = self.fc1(output)
        output = self.relu(output)
        output = self.dropout(output)
        output = self.fc2(output)
        return output
  

    def get_proba(self, input_ids, attention_mask, token_type_ids):
        return self.sigmoid(self(input_ids, attention_mask, token_type_ids))
        
        
    def fit(self, X, Y, X_val=None, Y_val=None, writer=None, fold_index=None):
        model = self.cuda()
        self.bert.cuda()
        tokenizer = self.tokenizer
        
        features = convert_examples_to_features(X, 512, tokenizer, tokenizer.cls_token,tokenizer.sep_token)

        all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)

        train_dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, torch.Tensor(Y))
        train_dl = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
        
        dataloaders = {"train": train_dl}
        
        if X_val is not None:
            features = convert_examples_to_features(X_val, 512, tokenizer, tokenizer.cls_token,tokenizer.sep_token)
            all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
            all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
            all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
            val_dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, torch.Tensor(Y_val))
            dataloaders["val"] = DataLoader(val_dataset, batch_size=self.batch_size)
        
        optimizer = AdamW(model.parameters(), lr=0.001)  
        
        for epoch in range(self.n_epochs):
            for step in dataloaders:
                if step == "train":
                    model.train()
                else:
                    model.eval()
                    
                total_muestras = 0.0
                total_correctas = 0.0
                total_loss = 0
                loss_class = [0, 0, 0, 0]
                all_logits = None
                all_target = None

                for batch in dataloaders[step]:
                    optimizer.zero_grad()                    
                    
                    input_ids, attention_mask, token_type_ids, target = [x.cuda() for x in batch]
                    target = target.cuda()                   
                    logits = model(input_ids, attention_mask, token_type_ids)          

                    loss = self.loss_object(logits, target)
                    total_loss += (loss * (target.shape[0] * target.shape[1])).item()

                    for i in range(target.shape[1]):
                        loss_c = self.loss_object(logits[:, i], target[:, i])
                        loss_class[i] += (loss_c * target.shape[0]).item()

                    preds = logits >= 0.0
                    if all_logits is None:
                        all_logits = preds.detach().cpu().numpy()
                        all_target = target.detach().cpu().numpy()
                    else:
                        all_logits = np.append(all_logits, preds.detach().cpu().numpy(), axis=0)
                        all_target = np.append(all_target, target.detach().cpu().numpy(), axis=0)

                    total_muestras += (target.shape[0]*target.shape[1])       # Sumamos el tamaño del batch
                    
                    if step == "train":
                        loss.backward()                             # Backpropagation
                        optimizer.step()                            # Actualizamos parámetros
                        correctas = (preds == target).sum().item()  # Acumulamos las correctas durante la época
                        total_correctas += correctas               
                        accuracy = total_correctas/total_muestras 
                        
                        print("\rEpoca {}: Loss: {:.4f} Accuracy: {:.2f}%".format(epoch, loss, 100*accuracy),
                              end="")

                save_data(writer, all_logits, all_target, total_loss, loss_class,
                          total_muestras, fold_index, epoch, step)

                    
    def predict_proba(self, X):
        model = self.cuda()
        self.bert.cuda()
        model.eval()
        self.bert.eval()
        
        tokenizer = self.tokenizer
        features = convert_examples_to_features(X, 512, tokenizer, tokenizer.cls_token,tokenizer.sep_token)

        all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)

        train_dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids)
        train_dl = DataLoader(train_dataset, batch_size=1)
        
        all_probs = None
        for batch in train_dl:
            input_ids, attention_mask, token_type_ids = [x.cuda() for x in batch]
            probs = model.get_proba(input_ids, attention_mask, token_type_ids)

            if all_probs is None:
                all_probs = probs.detach().cpu().numpy()
            else:
                all_probs = np.append(all_probs, probs.detach().cpu().numpy(), axis=0)

        return all_probs 

In [5]:
def cross_validation(args, name):
    writer = SummaryWriter(f'runs/{name}')
    
    embedding_train = df.preprocess_text.values
    Y_true = Y_true_
    
    np.random.seed(4444)
    torch.manual_seed(4444)

    datasets = []
    k_fold = IterativeStratification(n_splits=10, order=1)

    for fold_index, (train, test) in enumerate(k_fold.split(embedding_train, Y_true)):

        Y_train_fold, Y_test_fold = Y_true[train], Y_true[test]
        X_train_fold, X_test_fold = embedding_train[train], embedding_train[test]

        model = BetoEmbedding(**args)
        model.fit(X_train_fold, Y_train_fold, X_test_fold, Y_test_fold, writer, fold_index)
        torch.save(model.state_dict(), f'Models/Fold_{fold_index+1}_{name}.model')
        
        frames_probability = model.predict_proba(X_test_fold).tolist()
        y_pred = [[int(pred >= 0.5) for pred in frames] for frames in frames_probability]

        df_result = pd.DataFrame()
        df_result["y_pred"] = y_pred
        df_result["y_prob"] = frames_probability
        df_result["y_true"] = Y_test_fold.tolist()
        print(f'Folds {fold_index + 1}')
        data = process_folds([df_result])
        build_report(pd.DataFrame(data.mean()).T, data.applymap(lambda x:0), "Beto_Finetunning")

        datasets.append(df_result.copy())
        del model.bert
        del model
        gc.collect()
        torch.cuda.empty_cache()
    
    if writer:
        writer.close()

    print("Finals results")
    data = process_folds(datasets)
    build_report(pd.DataFrame(data.mean()).T, pd.DataFrame(data.std()).T, "Beto_Finetunning")

    with open(f"Results/cross_validation_{name}.pickle", "wb") as file:
        pickle.dump(datasets, file)


In [11]:
args = {'hidden_size': 150, "batch_size": 32, 'take_mean': True, 'n_epochs': 4, 'loss_function': "cross-entropy"}
    
cross_validation(args, 'Beto-finetunning_cross_entropy')

Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.3484 Accuracy: 81.21%Folds 1

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.79(±0.00)       0.79(±0.00)       0.78(±0.00)
               Macro       0.75(±0.00)       0.71(±0.00)       0.72(±0.00)       0.66(±0.00)       0.82(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.4154 Accuracy: 82.00%Folds 2

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.80(±0.00)       0.79(±0.00)       0.79(±0.00)
               Macro       0.76(±0.00)       0.73(±0.00)       0.74(±0.00)       0.69(±0.00)       0.84(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.4900 Accuracy: 81.77%Folds 3

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.80(±0.00)       0.80(±0.00)       0.80(±0.00)
               Macro       0.77(±0.00)       0.73(±0.00)       0.74(±0.00)       0.70(±0.00)       0.84(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.3573 Accuracy: 81.99%Folds 4

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.77(±0.00)       0.77(±0.00)       0.76(±0.00)
               Macro       0.73(±0.00)       0.68(±0.00)       0.68(±0.00)       0.65(±0.00)       0.82(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.3444 Accuracy: 81.91%Folds 5

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.79(±0.00)       0.79(±0.00)       0.79(±0.00)
               Macro       0.74(±0.00)       0.74(±0.00)       0.74(±0.00)       0.65(±0.00)       0.82(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.3604 Accuracy: 81.68%Folds 6

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.79(±0.00)       0.79(±0.00)       0.79(±0.00)
               Macro       0.75(±0.00)       0.72(±0.00)       0.73(±0.00)       0.68(±0.00)       0.83(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.4315 Accuracy: 81.74%Folds 7

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.78(±0.00)       0.78(±0.00)       0.78(±0.00)
               Macro       0.74(±0.00)       0.73(±0.00)       0.73(±0.00)       0.68(±0.00)       0.83(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.3574 Accuracy: 82.01%Folds 8

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.80(±0.00)       0.81(±0.00)       0.80(±0.00)
               Macro       0.76(±0.00)       0.73(±0.00)       0.75(±0.00)       0.72(±0.00)       0.85(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.3445 Accuracy: 82.00%Folds 9

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.80(±0.00)       0.80(±0.00)       0.80(±0.00)
               Macro       0.75(±0.00)       0.74(±0.00)       0.74(±0.00)       0.67(±0.00)       0.82(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 3: Loss: 0.4014 Accuracy: 82.14%Folds 10

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.79(±0.00)       0.79(±0.00)       0.79(±0.00)
               Macro       0.76(±0.00)       0.73(±0.00)       0.73(±0.00)       0.69(±0.00)       0.82(±0.00)
        
Finals results

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.79(±0.02)       0.79(±0.02)       0.79(±0.02)
               Macro       0.75(±0.03)       0.72(±0.04)       0.73(±0.04)       0.68(±0.05)       0.83(±0.02)
        


In [6]:
args = {'hidden_size': 150, "batch_size": 32, 'take_mean': True, 'n_epochs': 10, 'loss_function': "asymetric"}
    
cross_validation(args, 'Beto-finetunning_asymetric')

Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 1.9305 Accuracy: 77.01%%Folds 1

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.78(±0.00)       0.70(±0.00)       0.69(±0.00)
               Macro       0.69(±0.00)       0.71(±0.00)       0.65(±0.00)       0.65(±0.00)       0.79(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 8.6965 Accuracy: 77.01%%Folds 2

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.81(±0.00)       0.71(±0.00)       0.69(±0.00)
               Macro       0.73(±0.00)       0.73(±0.00)       0.66(±0.00)       0.69(±0.00)       0.83(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 9.3006 Accuracy: 76.57%%Folds 3

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.82(±0.00)       0.72(±0.00)       0.71(±0.00)
               Macro       0.73(±0.00)       0.73(±0.00)       0.67(±0.00)       0.67(±0.00)       0.82(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 7.6648 Accuracy: 76.91%%Folds 4

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.80(±0.00)       0.73(±0.00)       0.72(±0.00)
               Macro       0.71(±0.00)       0.74(±0.00)       0.68(±0.00)       0.62(±0.00)       0.81(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 8.2436 Accuracy: 77.31%%Folds 5

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.78(±0.00)       0.70(±0.00)       0.68(±0.00)
               Macro       0.69(±0.00)       0.72(±0.00)       0.64(±0.00)       0.68(±0.00)       0.82(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 7.5762 Accuracy: 76.47%%Folds 6

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.81(±0.00)       0.74(±0.00)       0.73(±0.00)
               Macro       0.74(±0.00)       0.75(±0.00)       0.70(±0.00)       0.67(±0.00)       0.82(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 6.5144 Accuracy: 75.33%%Folds 7

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.80(±0.00)       0.72(±0.00)       0.70(±0.00)
               Macro       0.72(±0.00)       0.73(±0.00)       0.66(±0.00)       0.68(±0.00)       0.81(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 4.2287 Accuracy: 74.41%%Folds 8

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.77(±0.00)       0.68(±0.00)       0.65(±0.00)
               Macro       0.68(±0.00)       0.71(±0.00)       0.62(±0.00)       0.71(±0.00)       0.83(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 10.2238 Accuracy: 74.47%Folds 9

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.80(±0.00)       0.72(±0.00)       0.71(±0.00)
               Macro       0.72(±0.00)       0.73(±0.00)       0.68(±0.00)       0.67(±0.00)       0.82(±0.00)
        


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated
Some weights of BertForPreTraining were not initialized from the model checkpoint at pytorch_model.bin and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoca 9: Loss: 6.5152 Accuracy: 75.55%%Folds 10

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.81(±0.00)       0.72(±0.00)       0.71(±0.00)
               Macro       0.73(±0.00)       0.74(±0.00)       0.68(±0.00)       0.67(±0.00)       0.82(±0.00)
        
Finals results

        Mean                
                            precision           recall         f1-score              AUC          ROC AUC
        
               Micro       0.80(±0.03)       0.71(±0.04)       0.70(±0.04)
               Macro       0.71(±0.04)       0.73(±0.03)       0.66(±0.04)       0.67(±0.05)       0.82(±0.02)
        
