In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 28.0 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.0-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 74.6 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 61.1 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.0 tokenizers-0.13.2 transformers-4.24.0


In [None]:
from transformers import AutoTokenizer, AutoModel
import random, math
from typing import Union
import numpy as np
import torch
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import transformers
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

def load_datasets() -> Union[dict, dict]:

    import json
    import os
    
    train = {}
    test = {}

    train_df = pd.read_csv(r'/content/drive/MyDrive/dreaddit/dreaddit-train.csv')
    test_df = pd.read_csv(r'/content/drive/MyDrive/dreaddit/dreaddit-test.csv')
        
    train["texts"] = train_df.text.tolist() 
    train["label"] = train_df.label.tolist() 

    test["texts"] = test_df.text.tolist()
    test["label"] = test_df.label.tolist()
    
    return train, test

class Dataset(torch.utils.data.Dataset):
    def __init__(self, data_dict: dict):
        self.data_dict = data_dict
        dd = data_dict

    def __len__(self):
        return len(self.data_dict["texts"])

    def __getitem__(self, idx):
        dd = self.data_dict
        return dd["texts"][idx], dd["label"][idx]


def train_BiLSTM(model, loader, device):
    model.train()
    criterion = model.get_criterion()
    total_loss = 0.0
    
    for X,y in tqdm(loader):
        
        optimizer.zero_grad()

        inputs = model.embedding(X).to(device)
        target = y.to(device, dtype=torch.float32)
        
        pred = model(inputs).squeeze()
        pred = torch.sigmoid(pred)

        loss = criterion(pred, target)
        # loss = F.binary_cross_entropy(pred, target)
        
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)

@torch.no_grad()
def eval_mentalbert(model, loader, device):
    model.eval()

    targets = []
    preds = []
    for X,y in tqdm(loader):
        #print(len(X))
        optimizer.zero_grad()

        inputs = model.embedding(X).to(device)
        target = y.to(device, dtype=torch.float32)

        pred = model(inputs).squeeze()
        pred = torch.sigmoid(pred)

        preds.append(pred)
        targets.append(target)
    #print(preds)

    return torch.cat(preds), torch.cat(targets)

class BiLSTM(nn.Module):

    def __init__(self):
        super(BiLSTM, self).__init__()
        
        self.tokenizer = AutoTokenizer.from_pretrained("mental/mental-bert-base-uncased")
        self.pretrained_model = AutoModel.from_pretrained("mental/mental-bert-base-uncased")

        self.hidden_size = 64
        drp = 0.1
        # self.embedding = nn.Embedding(max_features, embed_size)
        # self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        # self.embedding.weight.requires_grad = False
        self.lstm = nn.LSTM(768, self.hidden_size, bidirectional=True, batch_first=True)
        self.linear = nn.Linear(self.hidden_size*4 , 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(drp)
        self.out = nn.Linear(64, 1)
   
        self.criterion = torch.nn.BCEWithLogitsLoss(reduction='sum')
        
    def get_pretrained_model(self):
        return self.pretrained_model

    def get_tokenizer(self):
        return self.tokenizer
    
    def get_criterion(self):
        return self.criterion

    def assign_optimizer(self, lr=0.001):
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                                 lr=0.001)
        return optimizer

    def tokenize(
                self,
                texts: "list[str]",
                max_length: int = 100,
                truncation: bool = True,
                padding: bool = True,
            ):
                
                tokenizer = self.get_tokenizer()
                # print('------->>>',list(texts))

                tokenized = tokenizer(list(texts), 
                                        max_length = max_length, 
                                        truncation = True, 
                                        padding = True,
                                        return_tensors ="pt")
        
                return tokenized        


    def embedding(self, inputs):
        Model = self.get_pretrained_model()
        encoded_input = self.tokenize(inputs)
        # for t in encoded_input:
        # print('------->>>',encoded_input['input_ids'].shape)
        Model.eval()
        # sentence_embeddings = []
        with torch.no_grad():
                model_output_train = Model(**encoded_input.to(device))
                hid_states = model_output_train[0]
                # print('------->>>',hid_states.shape)
                # tok_vecs = hid_states[-2]
                # print(tok_vecs.shape)
                # sentence_embeddings.extend(torch.mean(tok_vecs,dim=1))
        # print(len(sentence_embeddings))
        return hid_states.to(device)
    
    # Progresses data across layers    
   

    def forward(self, x):
        # h_embedding = self.embedding(x)
        # h_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))
        
        h_lstm, _ = self.lstm(x)
        avg_pool = torch.mean(h_lstm, 1)
        max_pool, _ = torch.max(h_lstm, 1)
        #print("avg_pool", avg_pool.size())
        #print("max_pool", max_pool.size())
        conc = torch.cat(( avg_pool, max_pool), 1)
        conc = self.relu(self.linear(conc))
        conc = self.dropout(conc)
        out = self.out(conc)
        return out
    
    
if __name__ == "__main__":
    import pandas as pd

    random.seed(2022)
    torch.manual_seed(2022)

    sample_size = None  
    batch_size = 128
    n_epochs = 10
    num_words = 50000

    # If you use GPUs, use the code below:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_raw, test_raw = load_datasets()
    

    if sample_size is not None:
        for key in ["texts", "label"]:
            train_raw[key] = train_raw[key][:sample_size]
            test_raw[key] = test_raw[key][:sample_size]
    
    # print(len(train_raw['texts']))
    
    print("=" * 80)
    print("Running test code for part 1")
    print("-" * 80)


    test_loader = torch.utils.data.DataLoader(
        Dataset(test_raw), batch_size=batch_size, shuffle=False
    )


    X = np.array(train_raw['texts'])
    y = np.array(train_raw['label'])

    k = 10
    kf = KFold(n_splits=k, random_state=None)
    
    acc_train = []
    pr_train = []
    re_train = []
    f1_train = []

    acc_valid = []
    pr_valid = []
    re_valid = []
    f1_valid = []
    
    acc_test = []
    pr_test = []
    re_test = []
    f1_test = []    
    
     
    f = 0
    for train_index , test_index in kf.split(X):
        
        f+=1
        print('Fold', f, '>>>'*10) 
        
        train = {}
        valid = {}
        
        X_train , X_test = X[train_index], X[test_index]
        y_train , y_test = y[train_index] , y[test_index]
        
        train['texts'] = X_train.tolist()
        train['label'] = y_train.tolist()
        
        valid['texts'] = X_test.tolist()
        valid['label'] = y_test.tolist()

        train_loader = torch.utils.data.DataLoader(
            Dataset(train), batch_size=batch_size, shuffle=True
        )

        valid_loader = torch.utils.data.DataLoader(
            Dataset(valid), batch_size=batch_size, shuffle=False
        )
        
        # print('llllllll',len(valid['label']))
        
        model = BiLSTM().to(device)
        optimizer = model.assign_optimizer(lr=1e-4)
        
        train_accuracies = []
        train_precisions = []
        train_recalls = []
        train_f1_scores = []

        
        valid_accuracies = []
        valid_precisions = []
        valid_recalls = []
        valid_f1_scores = []        
        
        valid_accuracies = []
        
        previous_valid_accuracy = -1
        
        for epoch in range(n_epochs):

            loss = train_BiLSTM (model, train_loader, device=device)
    
            train_preds, train_targets = eval_mentalbert(model, train_loader, device=device)
            train_preds = train_preds.round()

            train_accuracy = accuracy_score(train_targets.cpu(), train_preds.cpu())
            train_accuracies.append(train_accuracy)
            train_precision = precision_score(train_targets.cpu(), train_preds.cpu())
            train_precisions.append(train_precision)
            train_recall = recall_score(train_targets.cpu(), train_preds.cpu())
            train_recalls.append(train_recall)
            train_f1 = f1_score(train_targets.cpu(), train_preds.cpu())
            train_f1_scores.append(train_f1)          
            

            eval_preds, eval_targets = eval_mentalbert(model, valid_loader, device=device)
            eval_preds = eval_preds.round()

            eval_accuracy = accuracy_score(eval_targets.cpu(), eval_preds.cpu())
            valid_accuracies.append(eval_accuracy)
            valid_precision = precision_score(eval_targets.cpu(), eval_preds.cpu())
            valid_precisions.append(valid_precision)
            valid_recall = recall_score(eval_targets.cpu(), eval_preds.cpu())
            valid_recalls.append(valid_recall)
            valid_f1_score = f1_score(eval_targets.cpu(), eval_preds.cpu())
            valid_f1_scores.append(valid_f1_score)
           
            print("Epoch:", epoch)
            print("Training loss:", loss)
            print("Train Accuracy:", train_accuracy)
            print("Validation Accuracy:", eval_accuracy)
    
            print('---'*10)   
            
            #if eval_accuracy < previous_valid_accuracy:
             #   break
            #else:
             #   previous_valid_accuracy = eval_accuracy
        
     
        acc_train.append(sum(train_accuracies)/len(train_accuracies))
        pr_train.append(sum(train_precisions)/len(train_precisions))
        re_train.append(sum(train_recalls)/len(train_recalls))
        f1_train.append(sum(train_f1_scores)/len(train_f1_scores))

        acc_valid.append(sum(valid_accuracies)/len(valid_accuracies))
        pr_valid.append(sum(valid_precisions)/len(valid_precisions))
        re_valid.append(sum(valid_recalls)/len(valid_recalls))
        f1_valid.append(sum(valid_f1_scores)/len(valid_f1_scores))
        
        print('---'*10) 


        test_preds, test_targets = eval_mentalbert(model, test_loader, device=device)
        test_preds = test_preds.round()
    
        test_accuracy = accuracy_score(test_targets.cpu(), test_preds.cpu())
        test_precision = precision_score(test_targets.cpu(), test_preds.cpu())
        test_recall = recall_score(test_targets.cpu(), test_preds.cpu())
        test_f1_score = f1_score(test_targets.cpu(), test_preds.cpu())
       
        print("TEST Accuracy:", test_accuracy)
        print("TEST Precision:", test_precision)
        print("TEST Recall:", test_recall)
        print("TEST F1_Score:", test_f1_score)
        
        
        acc_test.append(test_accuracy)
        pr_test.append(test_precision)
        re_test.append(test_recall)
        f1_test.append(test_f1_score)

        print('...'*10)
        
    print("Total Training Accuracy:", sum(acc_train)/len(acc_train))
    print("Total Training Preciion:", sum(pr_train)/len(pr_train))
    print("Total Training Recall:", sum(re_train)/len(re_train))
    print("Total Training F1_Score:", sum(f1_train)/len(f1_train))
    
    print('>>><<<'*10)

    print("Total Validation Accuracy:", sum(acc_valid)/len(acc_valid))
    print("Total Validation Precision:", sum(pr_valid)/len(pr_valid))
    print("Total Validation Recall:", sum(re_valid)/len(re_valid))
    print("Total Validation F1_Score:", sum(f1_valid)/len(f1_valid))

    print('>>><<<'*10)
    
    print("Total Test Accuracy:", sum(acc_test)/len(acc_test))
    print("Total Test Precision:", sum(pr_test)/len(pr_test))
    print("Total Test Recall:", sum(re_test)/len(re_test))
    print("Total Test F1_Score:", sum(f1_test)/len(f1_test))

    print('>>><<<'*10)



Running test code for part 1
--------------------------------------------------------------------------------
Fold 1 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Downloading:   0%|          | 0.00/321 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/639 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 88.72323188781738
Train Accuracy: 0.4780736100234926
Validation Accuracy: 0.45422535211267606
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 83.03682594299316
Train Accuracy: 0.7243539545810493
Validation Accuracy: 0.6901408450704225
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 77.15162162780761
Train Accuracy: 0.79796397807361
Validation Accuracy: 0.7816901408450704
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.42431526184082
Train Accuracy: 0.8132341425215348
Validation Accuracy: 0.7640845070422535
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 73.76379470825195
Train Accuracy: 0.8163664839467502
Validation Accuracy: 0.75
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 73.42352752685547
Train Accuracy: 0.8128425998433829
Validation Accuracy: 0.7394366197183099
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 73.19204368591309
Train Accuracy: 0.8610023492560689
Validation Accuracy: 0.7922535211267606
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 72.271480178833
Train Accuracy: 0.8555207517619421
Validation Accuracy: 0.7887323943661971
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 71.93695259094238
Train Accuracy: 0.8762725137039937
Validation Accuracy: 0.7852112676056338
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 70.96583671569825
Train Accuracy: 0.8688332028191073
Validation Accuracy: 0.7676056338028169
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.7636363636363637
TEST Precision: 0.8164556962025317
TEST Recall: 0.6991869918699187
TEST F1_Score: 0.7532846715328467
..............................
Fold 2 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 87.45772552490234
Train Accuracy: 0.6331245105716523
Validation Accuracy: 0.6091549295774648
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 77.77762260437012
Train Accuracy: 0.8010963194988254
Validation Accuracy: 0.7323943661971831
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 74.51400947570801
Train Accuracy: 0.8261550509005482
Validation Accuracy: 0.7711267605633803
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.37237434387207
Train Accuracy: 0.8206734534064213
Validation Accuracy: 0.7605633802816901
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 73.48649635314942
Train Accuracy: 0.8312451057165231
Validation Accuracy: 0.7535211267605634
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 72.8851146697998
Train Accuracy: 0.8582615505090054
Validation Accuracy: 0.7535211267605634
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 72.11754493713379
Train Accuracy: 0.8637431480031323
Validation Accuracy: 0.7746478873239436
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 72.56792106628419
Train Accuracy: 0.8610023492560689
Validation Accuracy: 0.7816901408450704
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 71.65815010070801
Train Accuracy: 0.8774471417384495
Validation Accuracy: 0.778169014084507
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 70.5803066253662
Train Accuracy: 0.8711824588880188
Validation Accuracy: 0.7676056338028169
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.7706293706293706
TEST Precision: 0.8253968253968254
TEST Recall: 0.7046070460704607
TEST F1_Score: 0.7602339181286548
..............................
Fold 3 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 87.64731254577637
Train Accuracy: 0.5736100234925607
Validation Accuracy: 0.5950704225352113
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 78.68495635986328
Train Accuracy: 0.79796397807361
Validation Accuracy: 0.7992957746478874
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 75.20587997436523
Train Accuracy: 0.8241973375097885
Validation Accuracy: 0.8098591549295775
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.00125694274902
Train Accuracy: 0.8300704776820673
Validation Accuracy: 0.823943661971831
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 73.63512153625489
Train Accuracy: 0.8347689898198903
Validation Accuracy: 0.795774647887324
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 72.55551986694336
Train Accuracy: 0.855912294440094
Validation Accuracy: 0.8204225352112676
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 71.7719882965088
Train Accuracy: 0.874706342991386
Validation Accuracy: 0.8133802816901409
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 70.83418769836426
Train Accuracy: 0.8888018794048551
Validation Accuracy: 0.8133802816901409
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 70.43147773742676
Train Accuracy: 0.8981989036805011
Validation Accuracy: 0.8063380281690141
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 69.9912010192871
Train Accuracy: 0.8711824588880188
Validation Accuracy: 0.8169014084507042
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.7776223776223776
TEST Precision: 0.847682119205298
TEST Recall: 0.6937669376693767
TEST F1_Score: 0.7630402384500745
..............................
Fold 4 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 88.13707656860352
Train Accuracy: 0.46906812842599843
Validation Accuracy: 0.5352112676056338
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 80.26832122802735
Train Accuracy: 0.8042286609240408
Validation Accuracy: 0.7816901408450704
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 75.2918773651123
Train Accuracy: 0.8191072826938136
Validation Accuracy: 0.7640845070422535
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.12598114013672
Train Accuracy: 0.8398590446358654
Validation Accuracy: 0.8063380281690141
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 72.98599433898926
Train Accuracy: 0.8543461237274863
Validation Accuracy: 0.8063380281690141
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 72.4199592590332
Train Accuracy: 0.8555207517619421
Validation Accuracy: 0.7887323943661971
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 72.26460838317871
Train Accuracy: 0.8441660140955364
Validation Accuracy: 0.778169014084507
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 71.69615936279297
Train Accuracy: 0.874706342991386
Validation Accuracy: 0.7992957746478874
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 70.63554344177246
Train Accuracy: 0.8782302270947533
Validation Accuracy: 0.7746478873239436
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 69.99806632995606
Train Accuracy: 0.8907595927956147
Validation Accuracy: 0.795774647887324
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.779020979020979
TEST Precision: 0.8040345821325648
TEST Recall: 0.7560975609756098
TEST F1_Score: 0.7793296089385475
..............................
Fold 5 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 88.07087669372558
Train Accuracy: 0.48198903680501176
Validation Accuracy: 0.4471830985915493
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 80.10132331848145
Train Accuracy: 0.7963978073610023
Validation Accuracy: 0.7922535211267606
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 75.94407691955567
Train Accuracy: 0.7411902897415819
Validation Accuracy: 0.7288732394366197
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.70459823608398
Train Accuracy: 0.8328112764291308
Validation Accuracy: 0.7922535211267606
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 73.7177890777588
Train Accuracy: 0.8308535630383712
Validation Accuracy: 0.7816901408450704
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 72.94974021911621
Train Accuracy: 0.845732184808144
Validation Accuracy: 0.795774647887324
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 72.9248233795166
Train Accuracy: 0.8382928739232577
Validation Accuracy: 0.7887323943661971
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 72.04969749450683
Train Accuracy: 0.8739232576350823
Validation Accuracy: 0.795774647887324
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 71.10737571716308
Train Accuracy: 0.8809710258418167
Validation Accuracy: 0.8169014084507042
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 70.43343048095703
Train Accuracy: 0.894283476898982
Validation Accuracy: 0.7992957746478874
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.7804195804195804
TEST Precision: 0.8154761904761905
TEST Recall: 0.7425474254742548
TEST F1_Score: 0.7773049645390071
..............................
Fold 6 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 88.73300285339356
Train Accuracy: 0.4780736100234926
Validation Accuracy: 0.45422535211267606
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 82.21779289245606
Train Accuracy: 0.7987470634299139
Validation Accuracy: 0.7464788732394366
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 76.31054153442383
Train Accuracy: 0.8007047768206734
Validation Accuracy: 0.7464788732394366
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.79053115844727
Train Accuracy: 0.8202819107282694
Validation Accuracy: 0.7640845070422535
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 74.35271873474122
Train Accuracy: 0.8355520751761942
Validation Accuracy: 0.778169014084507
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 73.36258888244629
Train Accuracy: 0.8527799530148786
Validation Accuracy: 0.7816901408450704
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 72.78773727416993
Train Accuracy: 0.8657008613938919
Validation Accuracy: 0.795774647887324
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 71.85545692443847
Train Accuracy: 0.879796397807361
Validation Accuracy: 0.778169014084507
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 71.19612197875976
Train Accuracy: 0.880187940485513
Validation Accuracy: 0.778169014084507
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 70.25005264282227
Train Accuracy: 0.884494909945184
Validation Accuracy: 0.778169014084507
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.786013986013986
TEST Precision: 0.8417721518987342
TEST Recall: 0.7208672086720868
TEST F1_Score: 0.7766423357664234
..............................
Fold 7 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 87.92465629577637
Train Accuracy: 0.495301487862177
Validation Accuracy: 0.5176056338028169
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 79.072998046875
Train Accuracy: 0.8003132341425215
Validation Accuracy: 0.795774647887324
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 75.25522499084472
Train Accuracy: 0.7928739232576351
Validation Accuracy: 0.778169014084507
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.31457595825195
Train Accuracy: 0.8328112764291308
Validation Accuracy: 0.8028169014084507
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 73.93937873840332
Train Accuracy: 0.8245888801879405
Validation Accuracy: 0.8133802816901409
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 72.68562698364258
Train Accuracy: 0.8519968676585747
Validation Accuracy: 0.8028169014084507
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 72.10256919860839
Train Accuracy: 0.8347689898198903
Validation Accuracy: 0.795774647887324
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 71.16599349975586
Train Accuracy: 0.8684416601409554
Validation Accuracy: 0.8133802816901409
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 70.82309188842774
Train Accuracy: 0.879796397807361
Validation Accuracy: 0.823943661971831
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 70.31904106140136
Train Accuracy: 0.8978073610023493
Validation Accuracy: 0.8133802816901409
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.7706293706293706
TEST Precision: 0.7762803234501348
TEST Recall: 0.7804878048780488
TEST F1_Score: 0.7783783783783784
..............................
Fold 8 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 87.17567176818848
Train Accuracy: 0.6554424432263116
Validation Accuracy: 0.6408450704225352
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 77.88005332946777
Train Accuracy: 0.7819107282693813
Validation Accuracy: 0.7711267605633803
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 75.35855484008789
Train Accuracy: 0.7987470634299139
Validation Accuracy: 0.7711267605633803
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.53083076477051
Train Accuracy: 0.8116679718089271
Validation Accuracy: 0.7922535211267606
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 74.24584045410157
Train Accuracy: 0.841033672670321
Validation Accuracy: 0.8133802816901409
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 72.84631843566895
Train Accuracy: 0.83672670321065
Validation Accuracy: 0.8063380281690141
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 72.16386108398437
Train Accuracy: 0.8637431480031323
Validation Accuracy: 0.8204225352112676
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 71.26387901306153
Train Accuracy: 0.8535630383711824
Validation Accuracy: 0.8204225352112676
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 71.57121925354004
Train Accuracy: 0.8778386844166014
Validation Accuracy: 0.8133802816901409
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 70.83132400512696
Train Accuracy: 0.8868441660140955
Validation Accuracy: 0.8169014084507042
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.793006993006993
TEST Precision: 0.7994579945799458
TEST Recall: 0.7994579945799458
TEST F1_Score: 0.7994579945799458
..............................
Fold 9 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 88.76411590576171
Train Accuracy: 0.4794520547945205
Validation Accuracy: 0.4416961130742049
------------------------------


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 82.44587669372558
Train Accuracy: 0.7185909980430528
Validation Accuracy: 0.7385159010600707
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 77.1852897644043
Train Accuracy: 0.7933463796477495
Validation Accuracy: 0.7915194346289752
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.86581306457519
Train Accuracy: 0.8101761252446184
Validation Accuracy: 0.7950530035335689
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 74.00882263183594
Train Accuracy: 0.8454011741682974
Validation Accuracy: 0.8127208480565371
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 73.3751651763916
Train Accuracy: 0.8536203522504893
Validation Accuracy: 0.7915194346289752
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 72.44253120422363
Train Accuracy: 0.8544031311154598
Validation Accuracy: 0.7915194346289752
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 72.48443756103515
Train Accuracy: 0.812133072407045
Validation Accuracy: 0.7632508833922261
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 72.09861640930175
Train Accuracy: 0.8700587084148728
Validation Accuracy: 0.7915194346289752
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 71.03929595947265
Train Accuracy: 0.8892367906066536
Validation Accuracy: 0.8021201413427562
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.7902097902097902
TEST Precision: 0.8101983002832861
TEST Recall: 0.7750677506775068
TEST F1_Score: 0.7922437673130195
..............................
Fold 10 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


Some weights of the model checkpoint at mental/mental-bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['bert.pooler.dense.weight', 'bert

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Training loss: 88.0973056793213
Train Accuracy: 0.5463796477495108
Validation Accuracy: 0.5300353356890459
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Training loss: 79.55671691894531
Train Accuracy: 0.7553816046966731
Validation Accuracy: 0.7597173144876325
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Training loss: 76.2331771850586
Train Accuracy: 0.7886497064579256
Validation Accuracy: 0.8021201413427562
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Training loss: 74.74832878112792
Train Accuracy: 0.8250489236790607
Validation Accuracy: 0.8197879858657244
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Training loss: 73.90953750610352
Train Accuracy: 0.813307240704501
Validation Accuracy: 0.7985865724381626
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Training loss: 72.81532173156738
Train Accuracy: 0.8567514677103718
Validation Accuracy: 0.8268551236749117
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Training loss: 72.82294845581055
Train Accuracy: 0.8207436399217222
Validation Accuracy: 0.7703180212014135
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Training loss: 71.87208213806153
Train Accuracy: 0.8477495107632094
Validation Accuracy: 0.784452296819788
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Training loss: 70.89327354431153
Train Accuracy: 0.8908023483365949
Validation Accuracy: 0.8197879858657244
------------------------------


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Training loss: 70.11643142700196
Train Accuracy: 0.898238747553816
Validation Accuracy: 0.8056537102473498
------------------------------
------------------------------


  0%|          | 0/6 [00:00<?, ?it/s]

TEST Accuracy: 0.772027972027972
TEST Precision: 0.7942857142857143
TEST Recall: 0.7533875338753387
TEST F1_Score: 0.7732962447844228
..............................
Total Training Accuracy: 0.8089625314345174
Total Training Preciion: 0.8633048662864502
Total Training Recall: 0.7216656590995703
Total Training F1_Score: 0.7692203751798729
>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<
Total Validation Accuracy: 0.7612055193350918
Total Validation Precision: 0.8067677254914607
Total Validation Recall: 0.672692175080021
Total Validation F1_Score: 0.7198079129015065
>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<
Total Test Accuracy: 0.7783216783216782
Total Test Precision: 0.8131039897911225
Total Test Recall: 0.7425474254742548
Total Test F1_Score: 0.775321212241132
>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<>>><<<
