## A model build using TweetBERT to classify tweet as causal or non-causal 

In [1]:
import pandas as pd
import numpy as np
import spacy 
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification, AutoTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import random
import os
import torch.nn.functional as F
import torch
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
import transformers
from tqdm import tqdm, trange
#from google.colab import drive, files
import io

In [2]:
#uploaded = files.upload()
#data = pd.read_excel(io.BytesIO(uploaded['Causality + hypoglycemia.xlsx']), sheet_name=">5000_samples_")


# data = pd.read_excel("/home/adrian/Downloads/Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")
data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")

  data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")


In [3]:
print("Total count:", data.shape[0])
data = data[data["Causal association"].notnull()]
print("Labeled count:", data.shape[0])

data.head()

Total count: 5456
Labeled count: 4999


Unnamed: 0,id,text,full_text,Intent,Cause,Effect,Causal association,Charline association0=no;1=yes,Remarks
0,908171203029868545,"tonight , I learned my older girl will back he...","tonight , I learned my older girl will back he...",,,,0.0,,
1,1203645589214367745,USER USER I knew diabetes and fibromyalgia wer...,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0.0,,
2,1310596731063525376,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,mS,,,0.0,,
3,1125198453167022085,USER Cheers ! Have one for this diabetic too !,USER Cheers ! Have one for this diabetic too !,mS,,,0.0,,
4,1248600944138268673,USER Additionally the medicines are being char...,USER Additionally the medicines are being char...,,medicines are being charged at MRP,costing much higher,1.0,,


### Interrater-reliabilty measure

In [4]:
from sklearn.metrics import cohen_kappa_score

charline = data[data["Charline association0=no;1=yes"].notnull()]
coder1 = charline["Causal association"].values
coder2 = charline["Charline association0=no;1=yes"]
score = cohen_kappa_score(coder1,coder2)
print('Cohen\'s Kappa:',score)

Cohen's Kappa: 0.8184494602551521


### Preprocessing

In [5]:
data["Causal association"].value_counts()

0.0    3719
1.0    1280
Name: Causal association, dtype: int64

In [6]:
# Normalisation for BertTweet
from nltk.tokenize import TweetTokenizer
from emoji import demojize
import re

tokenizer = TweetTokenizer()

# https://huggingface.co/vinai/bertweet-base
def normalizeToken(token):
    lowercased_token = token.lower()
    if token.startswith("@"):
        return "@USER"
    elif lowercased_token.startswith("http") or lowercased_token.startswith("www"):
        return "HTTPURL"
    elif len(token) == 1:
        return demojize(token)
    else:
        if token == "’":
            return "'"
        elif token == "…":
            return "..."
        else:
            return token

def normalizeTweet(tweet):

    tokens = tokenizer.tokenize(tweet.replace("’", "'").replace("…", "..."))
    normTweet = " ".join([normalizeToken(token) for token in tokens])

    normTweet = normTweet.replace("cannot ", "can not ").replace("n't ", " n't ").replace("n 't ", " n't ").replace("ca n't", "can't").replace("ai n't", "ain't")
    normTweet = normTweet.replace("'m ", " 'm ").replace("'re ", " 're ").replace("'s ", " 's ").replace("'ll ", " 'll ").replace("'d ", " 'd ").replace("'ve ", " 've ")
    normTweet = normTweet.replace(" p . m .", "  p.m.") .replace(" p . m ", " p.m ").replace(" a . m .", " a.m.").replace(" a . m ", " a.m ")

    normTweet = re.sub(r",([0-9]{2,4}) , ([0-9]{2,4})", r",\1,\2", normTweet)
    normTweet = re.sub(r"([0-9]{1,3}) / ([0-9]{2,4})", r"\1/\2", normTweet)
    normTweet = re.sub(r"([0-9]{1,3})- ([0-9]{2,4})", r"\1-\2", normTweet)
    
    return " ".join(normTweet.split())

In [7]:
def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    sentences = text.split("<stop>")
    #sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    sentences = [s  for s in sentences if s != ""]
    return sentences


def create_training_data(data):
    tweets = []
    causal_labels = []
    
    for i, row in data.iterrows():
#        print("\n")
#        print(row["full_text"])
#        print(row["Intent"], "->", row["Causal association"])
       
        sentences = split_into_sentences(row["full_text"])
        intents = set(str(row["Intent"]).strip().split(";"))
        causes = str(row["Cause"]).strip().split(";")
        effects = str(row["Effect"]).strip().split(";")
#        print("\tintents:", intents)
#        print("\tcauses: '{}'".format(causes))
#        print("\teffects: '{}'".format(effects))
        
        if set({"nan"}) == intents or set({" "}) == intents: # single sentence
#            print("\tA: add => causality: {}".format(row["Causal association"]))
            tweets.append(row["full_text"])
            causal_labels.append(row["Causal association"])

        elif (
             set({"q"}) == intents 
          or set({"joke"}) == intents 
          or set({"q", "joke"}) == intents
          or set({"joke", "mS"}) == intents 
          or set({"neg"}) == intents 
          or set({"neg", "msS"}) == intents
          or set({"neg", "mS"}) == intents
          or set({"neg", "msS", "mE"}) == intents
          or set({"q", "joke", "mS"}) == intents
          or set({"q", "msS", "neg"}) == intents
          or set({"neg", "mC"}) == intents
          or set({"mC", "joke", "msS"}) == intents
          or set({"joke", "mE"}) == intents
        ):
#            print("\tB ignore")
            continue
        elif (  
             set({"mS"}) == intents # multiple sentences (possible that cause and effect in different sentences -> ignore)
          or set({"q", "mS"}) == intents # multiple sentences or question
          or set({"mS", "mE"}) == intents
          or set({"mC", "mS"}) == intents
          or set({"mC", "mS", "mE"}) == intents
          or set({"q", "mC", "mS"}) == intents
          or set({"q", "mC", "mS", "mE"}) == intents
            
        ):
            for sent in sentences:
#                print(sent)
                if sent[-1] != "?": # ignore questions
#                    print("\tC: add, causality => 0")
                    tweets.append(sent)
                    causal_labels.append(0)  
#                else:
#                    print("\tD: ignore")
        elif (
            set({"msS"}) == intents # multiple sentences with cause and effect in single sentence
         or set({"q", "msS"}) == intents # msS and a question
         or set({"msS", "mE"}) == intents # msS with several effects
         or set({"mC", "msS"}) == intents
         or set({"mE"}) == intents # multiple effects
         or set({"mC"}) == intents # multiple causes
         or set({"mC", "msS", "mE"}) == intents
         or set({"mC", "mE"}) == intents
         or set({"q", "mC", "mE"}) == intents   
         or set({"q", "mC", "msS"}) == intents
        ):

            if row["Causal association"] != 1: #TEST
                print(sentences)
                print("1) ERROR: Causal association should be 1 !!!!")      
                print()
        
            for sent in sentences:
#                print("sub sent:", sent)
                if sent[-1] != "?": # ignore question
                    
                    existCause = False
                    for cause in causes:
                        if cause in sent:
#                            print("Cause: <{}> in sent".format(cause))
                            existCause = True
                    
                    existEffect = False
                    for effect in effects:
                        if effect in sent:
#                            print("Effect: <{}> in sent".format(effect))
                            existEffect = True
                            
                    if existCause and existEffect:
                        tweets.append(sent)
                        causal_labels.append(row["Causal association"])
#                        print("E: add with Cause + effect => association: {}".format(row["Causal association"]))
                    else:
                        tweets.append(sent)
                        causal_labels.append(0)
#                        print("F: not both C + E in sentence, association: 0")
#                else:
#                    print("H: question in sentence")
            if row["Causal association"] == 0:
                print(sentences)
                print("H: should not have causality == 0")
#        else:
#            print("END: should not happen")

    
    return pd.DataFrame({"tweet" : tweets, "Causal association" : causal_labels})

trainingData = create_training_data(data)
print("N sentences:", trainingData.shape)
trainingData = trainingData[trainingData["tweet"].str.split(" ").str.len() > 3] # keep only sentence with more than 3 tokens
print("N sentences with > 3 words:", trainingData.shape)

N sentences: (9775, 2)
N sentences with > 3 words: (8232, 2)


In [8]:
# let's print a few example of our cleaned and normalized traingin dataset
trainingData.head()

Unnamed: 0,tweet,Causal association
0,"tonight , I learned my older girl will back he...",0.0
1,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,0.0
2,I'm a trans woman .,0.0
3,"Both of us could use a world where "" brave and...",0.0
4,"Make a world where people can just be , withou...",0.0


In [9]:
trainingData["Causal association"].value_counts()

0.0    7219
1.0    1013
Name: Causal association, dtype: int64

### Training

In [10]:
text = trainingData["tweet"].map(normalizeTweet).values.tolist()
labels = trainingData["Causal association"].values.tolist()
# first split the data into traingin and testing label in the ratio of 80:20
train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2)
# split the new training data (80% of actual data) to get train and validation set
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2)



print("Train: {}".format(len(train_texts)))
print("Val: {}".format(len(val_texts)))
print("Test: {}".format(len(test_texts)))


Train: 5268
Val: 1317
Test: 1647


In [11]:
# train_texts[1]

In [12]:
# Transform labels + encodings into Pytorch DataSet object (including __len__, __getitem__)
class TweetDataSet(torch.utils.data.Dataset):
    def __init__(self, text, labels, tokenizer):
        self.text = text
        self.labels = labels
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.text, padding=True, truncation=True, return_token_type_ids=True)
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]
        return {
                "input_ids" : torch.tensor(ids[idx], dtype=torch.long)
              , "attention_mask" : torch.tensor(mask[idx], dtype=torch.long)
              , "token_type_ids" : torch.tensor(token_type_ids[idx], dtype=torch.long)
              , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
        }      

    def __len__(self):
        return len(self.labels)

    
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")

train_dataset = TweetDataSet(train_texts, train_labels, tokenizer)
val_dataset = TweetDataSet(val_texts, val_labels, tokenizer)
test_dataset = TweetDataSet(test_texts, test_labels, tokenizer)
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

# put data to batches (iterables)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


5268
1317
1647


In [13]:
# !jupyter nbextension enable --py widgetsnbextension

In [14]:
# # from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
# # from transformers import AutoModelForSequenceClassification
# from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef

# def compute_metrics(pred, labels):
#     #labels = pred.label_ids
#     #preds = pred.predictions.argmax(-1)
#     precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='binary')
#     acc = accuracy_score(labels, pred)
#     return {
#         'accuracy': acc,
#         'f1': f1,
#         'precision': precision,
#         'recall': recall
#     }



# class CausalityBERT(torch.nn.Module):
#     """ Model Bert"""
#     def __init__(self):
#         super(CausalityBERT, self).__init__()
#         self.num_labels = 2
#         self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
#         self.dropout = torch.nn.Dropout(0.3)
#         self.linear = torch.nn.Linear(768, self.num_labels)
#         # softmax layer missing ? -> Vivek
        
#     def forward(self, input_ids, attention_mask, token_type_ids):
#         _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False)
#         output_2 = self.dropout(output_1)
#         logits = self.linear(output_2)
#         return logits


# ## Model parameters
# batchsize_train = 16
# lr = 5e-5
# adam_eps = 1e-8
# epochs = 3 
# num_warmup_steps = 0
# num_training_steps = len(train_loader)*epochs

# # Store our loss and accuracy for plotting
# train_loss_set = []
# learning_rate = []


# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# #model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base")
# model = CausalityBERT()
# model.to(device)

# # fine-tune only the task-specific parameters -> Vivek? 
# for param in model.bert.parameters():
#     param.requires_grad = False
    
# model.to(device)
# model.train() # set model to training mode


# optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
# scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0; after a warmup period durnig which it increases linearly from 0 to the initial lr set in the optimizer
# loss_fn = CrossEntropyLoss()

# for epoch in trange(1, epochs+1, desc='Epoch'):
#     print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")
    
#     #calculate total loss for this epoch
#     batch_loss = 0
    
#     for batch in tqdm(train_loader):
#         #print("batch:", batch)
#         optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
#         input_ids = batch['input_ids'].to(device)
#         attention_mask = batch['attention_mask'].to(device)
#         token_type_ids = batch["token_type_ids"].to(device)
#         labels = batch['labels'].to(device)
#         #logits = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # forward pass
#         logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        
        
        
        
#         loss = loss_fn(logits, labels)
#         print("loss:", loss)
#         #loss = outputs[0]
#         loss.backward() # backward pass
#         optim.step()    # update parameters and take a step up using the computed gradient
#         scheduler.step()# update learning rate scheduler
#         batch_loss += loss.item() # update tracking variables
        
#     avg_train_loss = batch_loss / len(train_loader) # calculate avg loss over training data

#     # store the current learning rate
#     for param_group in optim.param_groups:
#         print("\n\tCurrent Learning rate: ", param_group['lr'])
#         learning_rate.append(param_group['lr'])
    
#     train_loss_set.append(avg_train_loss)
#     print(F'\n\tAverage Training loss: {avg_train_loss}')

    
#     ## ---- Validation ------
#     model.eval() # put model in evaluation mode for validation set
    
#     eval_accuracy, eval_mcc_accuracy, nb_eval_steps = 0, 0, 0 # Tracking variables
#     val_accuracy = []
#     val_loss = []
#     val_acc = []
#     val_prec = []
#     val_rec = []
#     val_f1 = []
    
    
#     # Evaluate data for one epoch
#     for batch in tqdm(validation_loader):
#         batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
#         b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
        
#         with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
#           #  logits = model(b_inputs_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids) # forward pass, calculates logit predictions
#           logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions
            
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())
        
#         # move logits and labels to CPU
#         logits = logits.detach().to('cpu').numpy()
#         label_ids = b_labels.to('cpu').numpy()
        
#         pred_flat = np.argmax(logits, axis=1).flatten() # convert logits to list of predicted labels
#         labels_flat = label_ids.flatten()
#         eval_accuracy += accuracy_score(labels_flat, pred_flat)
#         eval_mcc_accrucay += matthews_corrcoef(labels_flat, pred_flat)        
#         metrics = compute_metrics(pred_flat, labels_flat)
#         val_acc.append(metrics["accuracy"])
#         val_prec.append(metrics["precision"])
#         val_rec.append(metrics["recall"])
#         val_f1.append(metrics["f1"])
#         nb_eval_steps += 1
        
#     print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation loss: {np.mean(val_loss)}')
#     print(F'\n\tValidation acc: {np.mean(val_acc)}')
#     print(F'\n\tValidation prec: {np.mean(val_prec)}')
#     print(F'\n\tValidation rec: {np.mean(val_rec)}')
#     print(F'\n\tValidation f1: {np.mean(val_f1)}')

    


In [15]:
# from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
# from transformers import AutoModelForSequenceClassification

## we are measuring weighted metrics - as our dataset is unbalanced 
# Calculate metrics for each label, and find their average weighted by support
#(the number of true instances for each label). 
#This alters ‘macro’ to account for label imbalance; 
# it can result in an F-score that is not between precision and recall.


from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef

def compute_metrics(pred, labels):
    #labels = pred.label_ids
    #preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='weighted')
    acc = accuracy_score(labels, pred)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }



class CausalityBERT(torch.nn.Module):
    """ Model Bert"""
    def __init__(self):
        super(CausalityBERT, self).__init__()
        self.num_labels = 2
        self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
        self.dropout = torch.nn.Dropout(0.3)
        self.linear1 = torch.nn.Linear(768, 256)
        self.linear2 = torch.nn.Linear(256, self.num_labels)
        self.softmax = torch.nn.Softmax(-1)
        
    def forward(self, input_ids, attention_mask, token_type_ids):
        _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False) # if output 1 is our cls token        
        output_2 = self.dropout(output_1)
        output_3 = self.linear1(output_2)  
        output_4 = self.dropout(output_3)
        output_5 = self.linear2(output_4)
        logit = self.softmax(output_5)
        return logit



    


In [16]:
# help(torch.nn.Softmax)

In [17]:
## Model parameters
batchsize_train = 16
lr = 5e-5
adam_eps = 1e-8
epochs = 35
num_warmup_steps = 0
num_training_steps = len(train_loader)*epochs

# Store our loss and accuracy for plotting : where is accuracy : it is loss an dlearning rate 
train_loss_set = []
learning_rate = []


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = CausalityBERT()
model.to(device)

# fine-tune only the task-specific parameters -> Vivek? 
for param in model.bert.parameters():
    param.requires_grad = False


optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0; after a warmup period durnig which it increases linearly from 0 to the initial lr set in the optimizer
loss_fn = CrossEntropyLoss()



You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.0.attention.self.key.weight', 'roberta.encoder.layer.11.attention.self.key.bias', 'roberta.encoder.layer.6.attention.output.LayerNorm.weight', 'roberta.encoder.layer.7.intermediate.dense.bias', 'roberta.encoder.layer.4.attention.self.key.bias', 'roberta.encoder.layer.7.attention.self.value.bias', 'roberta.encoder.layer.8.attention.self.query.bias', 'roberta.encoder.layer.8.attention.self.value.weight', 'roberta.encoder.layer.9.intermediate.dense.weight', 'lm_head.bias', 'roberta.encoder.layer.8.intermediate.dense.weight', 'roberta.encoder.layer.11.attention.self.value.weight', 'roberta.encoder.layer.9.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.7.at

In [32]:



for epoch in trange(1, epochs+1, desc='Epoch'):
    print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")
    
    #calculate total loss for this epoch
#     epoch_loss = 0
    
    ########### training eval metrics #############################
    tr_accuracy, tr_mcc_accuracy, nb_tr_steps = 0, 0, 0 # Tracking variables
    train_accuracy = []
    train_loss = []
    train_acc = []
    train_prec = []
    train_rec = []
    train_f1 = []
    
    ###################################################
    
    for batch in tqdm(train_loader):
        #print("batch:", batch)
        optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch["token_type_ids"].to(device)
        labels = batch['labels'].to(device)
        #logits = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # forward pass
#         outputs = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        ###########################################################################
        model.train()
        logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        #############################################################################
        loss = loss_fn(logits, labels)
        print("loss:", loss)
        #loss = outputs[0]
        loss.backward() # backward pass
        optim.step()    # update parameters and take a step up using the computed gradient
        scheduler.step()# update learning rate scheduler
#         epoch_loss += loss.item() # update tracking variables
        train_loss.append(loss.item())
        
    
    ############# Training Accuracy Measure ##########################################
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())

        # move logits and labels to CPU
        logits = logits.detach().to('cpu').numpy()
        label_ids = labels.to('cpu').numpy()

        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()

        tr_accuracy += accuracy_score(labels_flat, pred_flat) # this is same as metric accuracy 
        tr_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  


        metrics = compute_metrics(pred_flat, labels_flat)
#         print(metrics)
        
        
#         train_loss.append(loss)
        train_acc.append(metrics["accuracy"])
        train_prec.append(metrics["precision"])
        train_rec.append(metrics["recall"])
        train_f1.append(metrics["f1"])
        
#         batch_loss
        nb_tr_steps += 1
        
#     batch_loss = np.mean(train_loss)
#     train_loss_set.append(batch_loss)
        
    
#     print(F'\n\tTraining Accuracy: {tr_accuracy / nb_tr_steps}') # accuracy is calculated twice 
    print(F'\n\tTraining MCC Accuracy: {tr_mcc_accuracy / nb_tr_steps}') # eval M
    
#     train_loss_set.append(batch_loss) # it should be either loss or batch_loss
    
    
#     train_loss = train_loss.to('cpu').numpy()
    print(F'\n\tTrain loss: {np.mean(train_loss)}')
    print(F'\n\ttrain acc: {np.mean(train_acc)}')
    print(F'\n\ttraining prec: {np.mean(train_prec)}')
    print(F'\n\ttraining rec: {np.mean(train_rec)}')
    print(F'\n\ttraining f1: {np.mean(train_f1)}')
    
    
    ###################################################################################
    #     avg_train_loss = batch_loss / len(train_loader) # calculate avg loss over training data : we don't need it as you did it for each sample
    #     train_loss_set.append(avg_train_loss)
    #     print(F'\n\tAverage Training loss: {avg_train_loss}')
    
#     train_loss_set.append(train_loss)
#     print(F'\n\tAverage Training loss per epoch: {train_loss}')


    # store the current learning rate
    for param_group in optim.param_groups:
        print("\n\tCurrent Learning rate: ", param_group['lr'])
        learning_rate.append(param_group['lr'])
    


    
    ## ---- Validation ------
#     model.eval() # put model in evaluation mode for validation set
    
    eval_accuracy, eval_mcc_accuracy, nb_eval_steps = 0, 0, 0 # Tracking variables
    val_accuracy = []
    val_loss = []
    val_acc = []
    val_prec = []
    val_rec = []
    val_f1 = []
    
    
    # Evaluate data for one epoch
    for batch in tqdm(validation_loader):
        batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
        b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
        
        with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
          #  logits = model(b_inputs_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids) # forward pass, calculates logit predictions
#             outputs = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
            ##################################################################################
            model.eval()
            logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 

                        
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())
        
        # move logits and labels to CPU
        logits = logits.detach().to('cpu').numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()
        
        eval_accuracy += accuracy_score(labels_flat, pred_flat)
        eval_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  
        
        
        metrics = compute_metrics(pred_flat, labels_flat)
        val_acc.append(metrics["accuracy"])
        val_prec.append(metrics["precision"])
        val_rec.append(metrics["recall"])
        val_f1.append(metrics["f1"])
        nb_eval_steps += 1
        
#     print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
    print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy / nb_eval_steps}') # eval M
    
#     val_loss = val_loss.to('cpu').numpy()
    print(F'\n\tValidation loss: {np.mean(val_loss)}')
    print(F'\n\tValidation acc: {np.mean(val_acc)}')
    print(F'\n\tValidation prec: {np.mean(val_prec)}')
    print(F'\n\tValidation rec: {np.mean(val_rec)}')
    print(F'\n\tValidation f1: {np.mean(val_f1)}')


Epoch:   0%|          | 0/35 [00:00<?, ?it/s]
  0%|          | 0/83 [00:00<?, ?it/s][A



  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)


loss: 


  1%|          | 1/83 [00:22<30:57, 22.66s/it][A

tensor(0.4551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:44<30:19, 22.47s/it][A

tensor(0.3939, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:06<29:36, 22.21s/it][A

tensor(0.4251, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:28<29:06, 22.11s/it][A

tensor(0.3625, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:50<28:39, 22.04s/it][A

tensor(0.3782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:12<28:11, 21.97s/it][A

tensor(0.3781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:34<27:46, 21.93s/it][A

tensor(0.4706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:56<27:23, 21.91s/it][A

tensor(0.4245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:18<26:59, 21.88s/it][A

tensor(0.3317, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:39<26:33, 21.83s/it][A

tensor(0.4090, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:01<26:14, 21.86s/it][A

tensor(0.4245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:23<25:50, 21.84s/it][A

tensor(0.4867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:45<25:29, 21.84s/it][A

tensor(0.4397, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:07<25:09, 21.88s/it][A

tensor(0.4862, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:29<24:44, 21.84s/it][A

tensor(0.4863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:51<24:25, 21.87s/it][A

tensor(0.4088, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:12<24:01, 21.84s/it][A

tensor(0.4091, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:34<23:35, 21.78s/it][A

tensor(0.3779, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:56<23:15, 21.80s/it][A

tensor(0.3935, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:18<22:53, 21.80s/it][A

tensor(0.4710, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:39<22:29, 21.77s/it][A

tensor(0.3935, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [08:01<22:10, 21.81s/it][A

tensor(0.5014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:23<21:48, 21.80s/it][A

tensor(0.4399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:45<21:28, 21.83s/it][A

tensor(0.4707, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:07<21:06, 21.83s/it][A

tensor(0.3932, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:28<20:42, 21.79s/it][A

tensor(0.4242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:50<20:21, 21.81s/it][A

tensor(0.4240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:12<19:59, 21.81s/it][A

tensor(0.3777, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:34<19:35, 21.77s/it][A

tensor(0.4547, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:56<19:12, 21.75s/it][A

tensor(0.4707, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:17<18:48, 21.70s/it][A

tensor(0.4706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:39<18:22, 21.63s/it][A

tensor(0.5013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [12:00<17:59, 21.58s/it][A

tensor(0.4551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:21<17:35, 21.54s/it][A

tensor(0.4395, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:43<17:13, 21.53s/it][A

tensor(0.4085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [13:04<16:51, 21.53s/it][A

tensor(0.4396, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:26<16:28, 21.48s/it][A

tensor(0.4396, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:47<16:06, 21.47s/it][A

tensor(0.4394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:09<15:45, 21.48s/it][A

tensor(0.4552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:30<15:22, 21.45s/it][A

tensor(0.4549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:52<15:02, 21.48s/it][A

tensor(0.4084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:13<14:39, 21.45s/it][A

tensor(0.4239, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:35<14:19, 21.49s/it][A

tensor(0.4394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:56<13:58, 21.51s/it][A

tensor(0.4086, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:18<13:37, 21.52s/it][A

tensor(0.4550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:39<13:17, 21.54s/it][A

tensor(0.3775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [17:01<12:56, 21.56s/it][A

tensor(0.4394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:22<12:33, 21.52s/it][A

tensor(0.4547, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:44<12:12, 21.55s/it][A

tensor(0.4394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:06<11:51, 21.56s/it][A

tensor(0.5012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:27<11:28, 21.51s/it][A

tensor(0.4392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:48<11:06, 21.49s/it][A

tensor(0.5014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:10<10:43, 21.46s/it][A

tensor(0.4082, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:31<10:22, 21.48s/it][A

tensor(0.4704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:53<10:01, 21.47s/it][A

tensor(0.4549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:14<09:39, 21.45s/it][A

tensor(0.3619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:36<09:18, 21.47s/it][A

tensor(0.4860, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:57<08:56, 21.47s/it][A

tensor(0.4085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:19<08:34, 21.46s/it][A

tensor(0.4084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:40<08:13, 21.47s/it][A

tensor(0.4702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [22:02<07:51, 21.44s/it][A

tensor(0.4702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:23<07:30, 21.46s/it][A

tensor(0.4703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:45<07:09, 21.47s/it][A

tensor(0.5014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:06<06:48, 21.49s/it][A

tensor(0.4240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:28<06:27, 21.51s/it][A

tensor(0.3928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:49<06:05, 21.51s/it][A

tensor(0.5013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:11<05:43, 21.48s/it][A

tensor(0.4239, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:32<05:22, 21.49s/it][A

tensor(0.3928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:53<05:00, 21.46s/it][A

tensor(0.4393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:15<04:39, 21.54s/it][A

tensor(0.3774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:37<04:19, 21.61s/it][A

tensor(0.4086, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:58<03:57, 21.56s/it][A

tensor(0.4551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:20<03:35, 21.56s/it][A

tensor(0.3770, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:41<03:14, 21.56s/it][A

tensor(0.4393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:03<02:52, 21.54s/it][A

tensor(0.4703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:25<02:30, 21.56s/it][A

tensor(0.5322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:46<02:09, 21.56s/it][A

tensor(0.4550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:08<01:47, 21.56s/it][A

tensor(0.4703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:29<01:26, 21.55s/it][A

tensor(0.4702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:51<01:04, 21.56s/it][A

tensor(0.4236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:12<00:43, 21.54s/it][A

tensor(0.4701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:34<00:21, 21.57s/it][A

tensor(0.4393, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:41<00:00, 21.46s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4143, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43664293727242803

	train acc: 0.8779367469879519

	training prec: 0.7723719879518073

	training rec: 0.8779367469879519

	training f1: 0.821354314129638

	Current Learning rate:  4.5714285714285716e-05



  2%|▏         | 1/42 [00:02<01:48,  2.65s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.63s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:37,  2.57s/it][A
 12%|█▏        | 5/42 [00:12<01:35,  2.59s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.60s/it][A
 17%|█▋        | 7/42 [00:18<01:29,  2.56s/it][A
 19%|█▉        | 8/42 [00:20<01:27,  2.58s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.59s/it][A
 24%|██▍       | 10/42 [00:25<01:23,  2.60s/it][A
 26%|██▌       | 11/42 [00:28<01:19,  2.57s/it][A
 29%|██▊       | 12/42 [00:31<01:17,  2.58s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.59s/it][A
 33%|███▎      | 14/42 [00:36<01:11,  2.56s/it][A
 36%|███▌      | 15/42 [00:38<01:09,  2.58s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.59s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.56s/it][A
 43%|████▎     | 18/42 [00:46<01:01,  2.57s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.59s/it][A
 48%|████▊     | 20/42 [00:51<00:56,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.42598203888961245

	Validation acc: 0.8876488095238095

	Validation prec: 0.7915504092261905

	Validation rec: 0.8876488095238095

	Validation f1: 0.8358973051824441
loss: 


  1%|          | 1/83 [00:21<29:33, 21.63s/it][A

tensor(0.3771, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:04, 21.53s/it][A

tensor(0.4391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:42, 21.53s/it][A

tensor(0.3773, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:21, 21.54s/it][A

tensor(0.4547, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:47<28:04, 21.60s/it][A

tensor(0.4392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:09<27:46, 21.64s/it][A

tensor(0.4082, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:25, 21.65s/it][A

tensor(0.4081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:53<27:08, 21.71s/it][A

tensor(0.4083, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:14<26:48, 21.74s/it][A

tensor(0.4236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:36<26:26, 21.73s/it][A

tensor(0.4236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:58<26:05, 21.74s/it][A

tensor(0.4390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:20<25:46, 21.78s/it][A

tensor(0.5012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:41<25:22, 21.75s/it][A

tensor(0.4705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:03<25:03, 21.79s/it][A

tensor(0.3924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:25<24:40, 21.78s/it][A

tensor(0.5011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:47<24:18, 21.77s/it][A

tensor(0.4236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:09<23:57, 21.78s/it][A

tensor(0.4081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:30<23:34, 21.76s/it][A

tensor(0.4393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:52<23:14, 21.79s/it][A

tensor(0.4393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:14<22:54, 21.82s/it][A

tensor(0.4547, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:36<22:30, 21.77s/it][A

tensor(0.3928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:58<22:08, 21.78s/it][A

tensor(0.3925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:19<21:48, 21.81s/it][A

tensor(0.4236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:41<21:26, 21.80s/it][A

tensor(0.4702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:03<21:05, 21.81s/it][A

tensor(0.4700, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:25<20:41, 21.78s/it][A

tensor(0.4391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:47<20:21, 21.81s/it][A

tensor(0.3614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:08<19:59, 21.81s/it][A

tensor(0.5479, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:30<19:36, 21.78s/it][A

tensor(0.4080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:52<19:15, 21.80s/it][A

tensor(0.4391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:14<18:54, 21.81s/it][A

tensor(0.4391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:36<18:31, 21.79s/it][A

tensor(0.4858, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:57<18:05, 21.71s/it][A

tensor(0.4391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:18<17:37, 21.59s/it][A

tensor(0.3769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:40<17:13, 21.54s/it][A

tensor(0.4237, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [13:01<16:49, 21.48s/it][A

tensor(0.3614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:22<16:24, 21.40s/it][A

tensor(0.3768, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:44<16:02, 21.40s/it][A

tensor(0.4080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:05<15:40, 21.38s/it][A

tensor(0.3925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:26<15:17, 21.33s/it][A

tensor(0.3924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:48<14:55, 21.31s/it][A

tensor(0.4544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:09<14:34, 21.33s/it][A

tensor(0.4702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:30<14:11, 21.28s/it][A

tensor(0.4701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:51<13:49, 21.28s/it][A

tensor(0.4390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:13<13:28, 21.27s/it][A

tensor(0.4235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:34<13:06, 21.25s/it][A

tensor(0.4079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:55<12:45, 21.26s/it][A

tensor(0.4546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:16<12:23, 21.24s/it][A

tensor(0.3923, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:38<12:02, 21.26s/it][A

tensor(0.4702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:59<11:41, 21.26s/it][A

tensor(0.5013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:20<11:19, 21.25s/it][A

tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:41<10:59, 21.27s/it][A

tensor(0.4234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:03<10:37, 21.26s/it][A

tensor(0.3923, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:24<10:15, 21.24s/it][A

tensor(0.5324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:45<09:55, 21.26s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:07<09:35, 21.30s/it][A

tensor(0.4389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:28<09:13, 21.29s/it][A

tensor(0.4546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:49<08:52, 21.29s/it][A

tensor(0.5012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:10<08:30, 21.27s/it][A

tensor(0.4545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:32<08:10, 21.32s/it][A

tensor(0.4391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:53<07:48, 21.31s/it][A

tensor(0.4079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:14<07:27, 21.29s/it][A

tensor(0.3922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:36<07:06, 21.33s/it][A

tensor(0.4235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:57<06:45, 21.34s/it][A

tensor(0.4546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:18<06:23, 21.30s/it][A

tensor(0.4546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:40<06:01, 21.28s/it][A

tensor(0.5167, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:01<05:40, 21.30s/it][A

tensor(0.4389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:22<05:19, 21.28s/it][A

tensor(0.4390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:43<04:58, 21.31s/it][A

tensor(0.4856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:05<04:36, 21.30s/it][A

tensor(0.3922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:26<04:15, 21.33s/it][A

tensor(0.4079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:48<03:54, 21.34s/it][A

tensor(0.4543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:09<03:33, 21.31s/it][A

tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:30<03:11, 21.33s/it][A

tensor(0.4233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:51<02:50, 21.33s/it][A

tensor(0.4233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:13<02:29, 21.29s/it][A

tensor(0.4856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:34<02:08, 21.34s/it][A

tensor(0.4700, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:55<01:46, 21.31s/it][A

tensor(0.3923, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:17<01:25, 21.35s/it][A

tensor(0.5012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:38<01:04, 21.37s/it][A

tensor(0.3921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:00<00:42, 21.35s/it][A

tensor(0.4077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:21<00:21, 21.39s/it][A

tensor(0.4077, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:28<00:00, 21.30s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4637, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.436542335763035

	train acc: 0.8775225903614458

	training prec: 0.7716325065888554

	training rec: 0.8775225903614458

	training f1: 0.820762977468252

	Current Learning rate:  4.428571428571428e-05



  2%|▏         | 1/42 [00:02<01:42,  2.51s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.57s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.60s/it][A
 10%|▉         | 4/42 [00:10<01:37,  2.56s/it][A
 12%|█▏        | 5/42 [00:12<01:35,  2.58s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.59s/it][A
 17%|█▋        | 7/42 [00:18<01:30,  2.60s/it][A
 19%|█▉        | 8/42 [00:20<01:27,  2.58s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.60s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:28<01:23,  2.68s/it][A
 29%|██▊       | 12/42 [00:31<01:23,  2.78s/it][A
 31%|███       | 13/42 [00:34<01:20,  2.78s/it][A
 33%|███▎      | 14/42 [00:37<01:15,  2.70s/it][A
 36%|███▌      | 15/42 [00:39<01:12,  2.68s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.67s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:05,  2.73s/it][A
 45%|████▌     | 19/42 [00:50<01:02,  2.72s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4258330734003158

	Validation acc: 0.8876488095238095

	Validation prec: 0.7922014508928571

	Validation rec: 0.8876488095238095

	Validation f1: 0.8361235417513007
loss: 


  1%|          | 1/83 [00:21<29:32, 21.62s/it][A

tensor(0.5480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:01, 21.49s/it][A

tensor(0.4078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:05<29:02, 21.78s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:37, 21.73s/it][A

tensor(0.3921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:47<27:54, 21.47s/it][A

tensor(0.4856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:08<27:23, 21.35s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:30<26:56, 21.27s/it][A

tensor(0.5012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:51<26:28, 21.18s/it][A

tensor(0.4701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:12<26:18, 21.33s/it][A

tensor(0.4077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:33<25:55, 21.31s/it][A

tensor(0.4233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:55<25:33, 21.30s/it][A

tensor(0.3922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:16<25:11, 21.29s/it][A

tensor(0.4544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:37<24:48, 21.26s/it][A

tensor(0.4389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:59<24:39, 21.44s/it][A

tensor(0.4389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:21<24:24, 21.54s/it][A

tensor(0.4078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:43<24:07, 21.60s/it][A

tensor(0.4078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:04<23:50, 21.68s/it][A

tensor(0.3922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:26<23:32, 21.73s/it][A

tensor(0.4233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:48<23:10, 21.73s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:10<22:50, 21.76s/it][A

tensor(0.5322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:32<22:28, 21.75s/it][A

tensor(0.5011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:53<22:08, 21.77s/it][A

tensor(0.4545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:15<21:48, 21.80s/it][A

tensor(0.4077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:37<21:26, 21.81s/it][A

tensor(0.4233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:59<21:04, 21.80s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:20<20:28, 21.56s/it][A

tensor(0.4234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:41<19:55, 21.35s/it][A

tensor(0.3921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:02<19:28, 21.25s/it][A

tensor(0.4232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:23<19:03, 21.17s/it][A

tensor(0.3765, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:44<18:38, 21.11s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:05<18:18, 21.13s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:26<17:56, 21.10s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:47<17:35, 21.11s/it][A

tensor(0.4700, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:08<17:14, 21.12s/it][A

tensor(0.4544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:29<16:53, 21.11s/it][A

tensor(0.4544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:50<16:34, 21.16s/it][A

tensor(0.4232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:12<16:14, 21.19s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:33<15:51, 21.15s/it][A

tensor(0.4699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:54<15:33, 21.21s/it][A

tensor(0.5011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:15<15:11, 21.20s/it][A

tensor(0.3921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:37<14:52, 21.26s/it][A

tensor(0.4856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:58<14:32, 21.27s/it][A

tensor(0.4389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:19<14:10, 21.25s/it][A

tensor(0.5166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:41<13:49, 21.27s/it][A

tensor(0.4543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:02<13:28, 21.28s/it][A

tensor(0.4232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:23<13:06, 21.26s/it][A

tensor(0.4076, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:44<12:46, 21.30s/it][A

tensor(0.4232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:06<12:25, 21.30s/it][A

tensor(0.3453, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:27<12:03, 21.28s/it][A

tensor(0.3610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:48<11:42, 21.30s/it][A

tensor(0.4234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:10<11:20, 21.27s/it][A

tensor(0.4077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:31<11:00, 21.30s/it][A

tensor(0.4077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:52<10:39, 21.31s/it][A

tensor(0.5011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:14<10:17, 21.29s/it][A

tensor(0.4077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:35<09:57, 21.32s/it][A

tensor(0.3920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:56<09:35, 21.32s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:17<09:13, 21.29s/it][A

tensor(0.3609, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:39<08:52, 21.29s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:00<08:33, 21.40s/it][A

tensor(0.4232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:22<08:13, 21.47s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:44<07:54, 21.58s/it][A

tensor(0.3920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:06<07:33, 21.60s/it][A

tensor(0.4233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:27<07:13, 21.68s/it][A

tensor(0.4388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:49<06:52, 21.70s/it][A

tensor(0.4854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:11<06:30, 21.71s/it][A

tensor(0.3919, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:33<06:08, 21.70s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:54<05:45, 21.62s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:16<05:25, 21.67s/it][A

tensor(0.4856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:38<05:04, 21.74s/it][A

tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:00<04:45, 21.96s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:23<04:25, 22.13s/it][A

tensor(0.3608, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:45<04:04, 22.21s/it][A

tensor(0.4699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:07<03:41, 22.17s/it][A

tensor(0.4232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:29<03:19, 22.16s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:51<02:56, 22.09s/it][A

tensor(0.5323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:13<02:33, 21.97s/it][A

tensor(0.4544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:35<02:11, 21.96s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:58<01:51, 22.32s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:21<01:30, 22.57s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:43<01:07, 22.47s/it][A

tensor(0.3919, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:05<00:44, 22.30s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:27<00:22, 22.17s/it][A

tensor(0.4077, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:34<00:00, 21.38s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4636, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43628564584686097

	train acc: 0.8775225903614458

	training prec: 0.7716501553087349

	training rec: 0.8775225903614458

	training f1: 0.8207687677315189

	Current Learning rate:  4.2857142857142856e-05



  2%|▏         | 1/42 [00:02<01:49,  2.67s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.66s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.63s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.65s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.65s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4257577430634272

	Validation acc: 0.8876488095238095

	Validation prec: 0.7914574032738095

	Validation rec: 0.8876488095238095

	Validation f1: 0.8358706803231961
loss: 


  1%|          | 1/83 [00:21<29:51, 21.85s/it][A

tensor(0.4230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:21, 21.74s/it][A

tensor(0.4232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:05<29:02, 21.78s/it][A

tensor(0.3919, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:27<28:41, 21.79s/it][A

tensor(0.3919, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:15, 21.74s/it][A

tensor(0.3921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:10<27:48, 21.67s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:07, 21.41s/it][A

tensor(0.3763, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:52<26:31, 21.22s/it][A

tensor(0.4543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:12<26:03, 21.12s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:33<25:35, 21.04s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:54<25:08, 20.95s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:15<24:47, 20.95s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:36<24:24, 20.93s/it][A

tensor(0.4854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:57<24:05, 20.95s/it][A

tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:18<23:46, 20.98s/it][A

tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:39<23:24, 20.96s/it][A

tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:00<23:05, 20.99s/it][A

tensor(0.4075, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:21<22:45, 21.00s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:42<22:24, 21.01s/it][A

tensor(0.4543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:03<22:04, 21.03s/it][A

tensor(0.5322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:24<21:41, 20.99s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:45<21:21, 21.01s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:06<21:00, 21.02s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:27<20:37, 20.98s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:48<20:17, 21.00s/it][A

tensor(0.4543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:09<19:57, 21.00s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:30<19:34, 20.98s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:51<19:14, 21.00s/it][A

tensor(0.3763, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:12<18:53, 20.99s/it][A

tensor(0.3451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:33<18:33, 21.01s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:54<18:11, 20.99s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:15<17:48, 20.95s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:36<17:29, 20.99s/it][A

tensor(0.3919, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:57<17:08, 20.99s/it][A

tensor(0.4854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:18<16:46, 20.96s/it][A

tensor(0.4543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:39<16:26, 20.98s/it][A

tensor(0.5166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:00<16:05, 20.99s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:21<15:43, 20.96s/it][A

tensor(0.4699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:42<15:23, 20.98s/it][A

tensor(0.3763, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:03<15:03, 21.01s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:24<14:40, 20.97s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:45<14:20, 20.99s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:06<13:58, 20.97s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:27<13:38, 20.99s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:48<13:17, 20.99s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:08<12:55, 20.95s/it][A

tensor(0.5166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:30<12:37, 21.03s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:51<12:17, 21.06s/it][A

tensor(0.4699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:12<11:55, 21.06s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:33<11:36, 21.11s/it][A

tensor(0.4854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:54<11:16, 21.14s/it][A

tensor(0.4543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:16<11:00, 21.31s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:38<10:43, 21.45s/it][A

tensor(0.3607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:00<10:24, 21.54s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:21<10:05, 21.63s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:43<09:44, 21.66s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:05<09:23, 21.66s/it][A

tensor(0.4075, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:26<09:01, 21.65s/it][A

tensor(0.4543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:48<08:40, 21.67s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:10<08:19, 21.70s/it][A

tensor(0.3919, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:32<07:57, 21.72s/it][A

tensor(0.5010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:53<07:32, 21.54s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:14<07:08, 21.40s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:35<06:45, 21.35s/it][A

tensor(0.3295, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:56<06:22, 21.28s/it][A

tensor(0.4232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:17<06:01, 21.26s/it][A

tensor(0.3606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:39<05:40, 21.25s/it][A

tensor(0.3762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:00<05:18, 21.21s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:21<04:56, 21.21s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:42<04:35, 21.21s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:03<04:14, 21.18s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:24<03:53, 21.19s/it][A

tensor(0.3918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:45<03:31, 21.12s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:07<03:10, 21.14s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:28<02:48, 21.12s/it][A

tensor(0.3606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:49<02:27, 21.10s/it][A

tensor(0.4699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:10<02:06, 21.13s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:31<01:45, 21.15s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:52<01:24, 21.14s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:14<01:03, 21.17s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:35<00:42, 21.19s/it][A

tensor(0.3918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:56<00:21, 21.16s/it][A

tensor(0.3762, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:03<00:00, 21.00s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4135, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4357115195458194

	train acc: 0.8779367469879519

	training prec: 0.7724719973644578

	training rec: 0.8779367469879519

	training f1: 0.8213836112779561

	Current Learning rate:  4.1428571428571437e-05



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:43,  2.58s/it][A
  7%|▋         | 3/42 [00:07<01:38,  2.52s/it][A
 10%|▉         | 4/42 [00:10<01:36,  2.54s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.52s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.54s/it][A
 17%|█▋        | 7/42 [00:17<01:29,  2.56s/it][A
 19%|█▉        | 8/42 [00:20<01:27,  2.56s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.53s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.55s/it][A
 26%|██▌       | 11/42 [00:28<01:19,  2.56s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.54s/it][A
 31%|███       | 13/42 [00:33<01:13,  2.55s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.56s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.53s/it][A
 38%|███▊      | 16/42 [00:40<01:06,  2.55s/it][A
 40%|████      | 17/42 [00:43<01:04,  2.56s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.53s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.55s/it][A
 48%|████▊     | 20/42 [00:50<00:56,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4257159644649142

	Validation acc: 0.8876488095238095

	Validation prec: 0.7922944568452381

	Validation rec: 0.8876488095238095

	Validation f1: 0.8361383554195113
loss: 


  1%|          | 1/83 [00:21<29:04, 21.27s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:41, 21.25s/it][A

tensor(0.3918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:14, 21.18s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:52, 21.17s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:46<27:33, 21.20s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:07<27:07, 21.14s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:49, 21.17s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:49<26:26, 21.16s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<25:57, 21.05s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:31<25:33, 21.00s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:51<25:07, 20.94s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:12<24:45, 20.93s/it][A

tensor(0.5322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:33<24:23, 20.91s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:54<23:58, 20.85s/it][A

tensor(0.3451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:15<23:38, 20.87s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:36<23:18, 20.87s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<22:54, 20.82s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:33, 20.82s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:13, 20.83s/it][A

tensor(0.3606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<21:50, 20.80s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:43, 21.02s/it][A

tensor(0.5010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:42<21:27, 21.11s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:03<21:08, 21.14s/it][A

tensor(0.3606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:24<20:43, 21.07s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:45<20:24, 21.11s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:06<20:00, 21.06s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:27<19:36, 21.01s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:48<19:12, 20.95s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:09<18:50, 20.94s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:29<18:28, 20.91s/it][A

tensor(0.4387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:50<18:06, 20.90s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:11<17:45, 20.89s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:32<17:23, 20.87s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:53<17:03, 20.90s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:44, 20.92s/it][A

tensor(0.4230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:22, 20.91s/it][A

tensor(0.4699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<16:03, 20.94s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:43, 20.97s/it][A

tensor(0.3606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:23, 21.00s/it][A

tensor(0.5010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:04, 21.04s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:41, 20.99s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:42<14:32, 21.28s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:04<14:18, 21.47s/it][A

tensor(0.4231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:25<13:59, 21.53s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:47<13:41, 21.61s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:09<13:20, 21.63s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:30<12:57, 21.60s/it][A

tensor(0.3918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:52<12:36, 21.62s/it][A

tensor(0.3762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:14<12:19, 21.74s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:36<12:00, 21.83s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:58<11:39, 21.85s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:20<11:16, 21.84s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:42<10:55, 21.85s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:04<10:33, 21.84s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:25<10:10, 21.81s/it][A

tensor(0.3762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:47<09:48, 21.81s/it][A

tensor(0.3606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:09<09:26, 21.78s/it][A

tensor(0.4230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:31<09:04, 21.80s/it][A

tensor(0.4230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:52<08:42, 21.77s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:14<08:20, 21.77s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:36<07:59, 21.79s/it][A

tensor(0.4854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:58<07:38, 21.82s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:20<07:16, 21.80s/it][A

tensor(0.3918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:42<06:55, 21.85s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:03<06:32, 21.83s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:25<06:10, 21.77s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:47<05:47, 21.74s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:08<05:26, 21.75s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:30<05:03, 21.70s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:52<04:42, 21.72s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:13<04:20, 21.67s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:35<03:58, 21.70s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:57<03:37, 21.72s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:18<03:15, 21.70s/it][A

tensor(0.5010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:40<02:52, 21.60s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:02<02:31, 21.66s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:23<02:10, 21.69s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:45<01:48, 21.74s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:07<01:27, 21.76s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:29<01:05, 21.75s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:51<00:43, 21.77s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:12<00:21, 21.64s/it][A

tensor(0.4542, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:19<00:00, 21.19s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5134, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43645642895296394

	train acc: 0.8771084337349397

	training prec: 0.7710768072289157

	training rec: 0.8771084337349397

	training f1: 0.8202181596529077

	Current Learning rate:  4e-05



  2%|▏         | 1/42 [00:02<01:49,  2.67s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.61s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.62s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:36<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.42970534236658187

	Validation acc: 0.8836309523809524

	Validation prec: 0.7851413690476191

	Validation rec: 0.8836309523809524

	Validation f1: 0.8303636451004366
loss: 


  1%|          | 1/83 [00:21<29:42, 21.73s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<28:59, 21.47s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:29, 21.37s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:20, 21.53s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:47<28:06, 21.62s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:09<27:45, 21.62s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:27, 21.68s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:52<27:06, 21.69s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:14<26:46, 21.71s/it][A

tensor(0.4230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:36<26:26, 21.73s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:58<26:02, 21.71s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:19<25:38, 21.67s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:41<25:11, 21.60s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:02<24:51, 21.61s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:24<24:33, 21.66s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:46<24:06, 21.59s/it][A

tensor(0.3918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:06<23:28, 21.35s/it][A

tensor(0.4074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:27<22:57, 21.20s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:48<22:30, 21.10s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:09<22:02, 21.00s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:30<21:38, 20.95s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:50<21:14, 20.89s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:11<20:53, 20.89s/it][A

tensor(0.5010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:32<20:31, 20.88s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:53<20:09, 20.85s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:14<19:48, 20.85s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:35<19:27, 20.85s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:55<19:04, 20.81s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:16<18:44, 20.82s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:37<18:22, 20.80s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:58<18:01, 20.80s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:19<17:41, 20.81s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:39<17:20, 20.81s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:00<17:00, 20.83s/it][A

tensor(0.3449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:21<16:40, 20.83s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:42<16:17, 20.81s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:03<15:56, 20.80s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:24<15:37, 20.83s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:44<15:15, 20.81s/it][A

tensor(0.3762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:05<14:55, 20.83s/it][A

tensor(0.3918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:26<14:35, 20.84s/it][A

tensor(0.5945, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:47<14:13, 20.81s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:08<13:52, 20.82s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:28<13:31, 20.81s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:49<13:11, 20.82s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:10<12:50, 20.82s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:31<12:28, 20.79s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:52<12:08, 20.81s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:12<11:47, 20.81s/it][A

tensor(0.3605, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:33<11:25, 20.78s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:54<11:05, 20.79s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:15<10:44, 20.80s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:35<10:23, 20.77s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:56<10:03, 20.80s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:17<09:41, 20.77s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:38<09:21, 20.80s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:59<09:01, 20.82s/it][A

tensor(0.4698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:20<08:40, 20.80s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:40<08:19, 20.81s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:01<07:58, 20.82s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:22<07:37, 20.81s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:43<07:17, 20.82s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:04<06:56, 20.83s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:24<06:35, 20.81s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:45<06:14, 20.83s/it][A

tensor(0.3605, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:06<05:53, 20.80s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:27<05:33, 20.82s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:48<05:12, 20.81s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:09<04:51, 20.80s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:29<04:30, 20.82s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:50<04:10, 20.84s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:11<03:48, 20.79s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:33<03:30, 21.05s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:54<03:10, 21.19s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:16<02:50, 21.33s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:37<02:30, 21.45s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:59<02:07, 21.32s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:20<01:46, 21.26s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:41<01:24, 21.19s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:02<01:03, 21.12s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:23<00:42, 21.13s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:44<00:21, 21.13s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:50<00:00, 20.85s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4635, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43597984637122555

	train acc: 0.8775225903614458

	training prec: 0.7719678322665662

	training rec: 0.8775225903614458

	training f1: 0.8208677050167859

	Current Learning rate:  3.857142857142858e-05



  2%|▏         | 1/42 [00:02<01:45,  2.57s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.56s/it][A
  7%|▋         | 3/42 [00:07<01:37,  2.50s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.54s/it][A
 14%|█▍        | 6/42 [00:15<01:30,  2.51s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.52s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.54s/it][A
 21%|██▏       | 9/42 [00:22<01:22,  2.50s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.52s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.53s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.54s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.51s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.52s/it][A
 36%|███▌      | 15/42 [00:37<01:08,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:04,  2.50s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.52s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.53s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.50s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256721025421506

	Validation acc: 0.8876488095238095

	Validation prec: 0.7904808407738095

	Validation rec: 0.8876488095238095

	Validation f1: 0.8355751576484155
loss: 


  1%|          | 1/83 [00:21<29:36, 21.66s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:20, 21.74s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:05<29:00, 21.76s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:35, 21.72s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:16, 21.75s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:10<27:54, 21.75s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:32<27:30, 21.72s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:53<27:06, 21.68s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:15<26:47, 21.72s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:37<26:24, 21.71s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:58<26:02, 21.70s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:20<25:40, 21.70s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:42<25:21, 21.73s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:04<25:01, 21.75s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:25<24:37, 21.73s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:47<24:15, 21.73s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:09<23:54, 21.74s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:31<23:32, 21.72s/it][A

tensor(0.3917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:52<23:12, 21.75s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:14<22:48, 21.72s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:36<22:23, 21.68s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:57<22:03, 21.70s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:19<21:41, 21.68s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:41<21:21, 21.72s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:03<21:00, 21.73s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:24<20:36, 21.69s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:46<20:15, 21.71s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:08<19:54, 21.72s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:29<19:30, 21.67s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:51<19:09, 21.69s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:13<18:48, 21.71s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:34<18:25, 21.69s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:56<18:04, 21.69s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:18<17:40, 21.65s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:39<17:19, 21.66s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [13:01<17:00, 21.71s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:23<16:37, 21.68s/it][A

tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:44<16:15, 21.69s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:06<15:54, 21.70s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:28<15:32, 21.67s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:49<15:10, 21.68s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:11<14:47, 21.65s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:33<14:26, 21.66s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:54<14:05, 21.69s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:16<13:43, 21.66s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:38<13:23, 21.71s/it][A

tensor(0.3761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [17:00<13:02, 21.74s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:21<12:40, 21.72s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:43<12:15, 21.63s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:05<11:55, 21.68s/it][A

tensor(0.3604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:26<11:33, 21.66s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:48<11:12, 21.70s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:10<10:51, 21.72s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:32<10:30, 21.76s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:53<10:09, 21.77s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:15<09:44, 21.66s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:36<09:19, 21.53s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:57<08:55, 21.41s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:18<08:31, 21.30s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:39<08:09, 21.27s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [22:01<07:46, 21.21s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:22<07:24, 21.17s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:43<07:03, 21.18s/it][A

tensor(0.4073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:04<06:40, 21.09s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:26<06:23, 21.32s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:47<06:04, 21.43s/it][A

tensor(0.4229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:09<05:43, 21.49s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:31<05:23, 21.56s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:52<05:02, 21.57s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:14<04:40, 21.61s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:36<04:19, 21.62s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:57<03:57, 21.61s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:19<03:36, 21.64s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:40<03:14, 21.65s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:02<02:52, 21.60s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:24<02:31, 21.61s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:45<02:09, 21.62s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:06<01:46, 21.38s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:27<01:24, 21.22s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:48<01:03, 21.12s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:09<00:41, 20.99s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:29<00:20, 20.95s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:36<00:00, 21.40s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4135, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43552304283682125

	train acc: 0.8779367469879519

	training prec: 0.772624952936747

	training rec: 0.8779367469879519

	training f1: 0.8214387817014653

	Current Learning rate:  3.7142857142857143e-05



  2%|▏         | 1/42 [00:02<01:45,  2.57s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.57s/it][A
  7%|▋         | 3/42 [00:07<01:40,  2.56s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.54s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.55s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.52s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.53s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.54s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.55s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.52s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.53s/it][A
 31%|███       | 13/42 [00:33<01:13,  2.54s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.53s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.54s/it][A
 38%|███▊      | 16/42 [00:40<01:06,  2.55s/it][A
 40%|████      | 17/42 [00:43<01:02,  2.52s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.53s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.54s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.42967666401749566

	Validation acc: 0.8836309523809524

	Validation prec: 0.7839787946428571

	Validation rec: 0.8836309523809524

	Validation f1: 0.8300013006685939
loss: 


  1%|          | 1/83 [00:21<28:56, 21.18s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:29, 21.11s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:13, 21.17s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:53, 21.18s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:29, 21.15s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:07<27:11, 21.18s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:46, 21.14s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:49<26:28, 21.18s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:08, 21.19s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:31<25:44, 21.15s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:52<25:23, 21.16s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:13<25:02, 21.17s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:35<24:39, 21.13s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:56<24:25, 21.23s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:18<24:16, 21.42s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:40<24:02, 21.54s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:01<23:47, 21.62s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:23<23:27, 21.66s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:45<23:09, 21.71s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:07<22:50, 21.76s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:28<22:19, 21.61s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:50<21:55, 21.57s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:12<21:38, 21.65s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:33<21:17, 21.65s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:55<20:54, 21.64s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:16<20:31, 21.60s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:38<20:13, 21.68s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:00<19:51, 21.66s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:21<19:28, 21.65s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:43<19:08, 21.67s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:05<18:42, 21.59s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:26<18:12, 21.43s/it][A

tensor(0.4385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:47<17:47, 21.34s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:08<17:23, 21.29s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:29<17:00, 21.27s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:50<16:37, 21.23s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:11<16:12, 21.14s/it][A

tensor(0.3604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:32<15:49, 21.10s/it][A

tensor(0.3604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:53<15:26, 21.06s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:14<15:03, 21.01s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:35<14:42, 21.01s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:56<14:21, 21.01s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:17<13:59, 20.99s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:38<13:38, 21.00s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:59<13:17, 21.00s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:20<12:55, 20.96s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:41<12:36, 21.00s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:02<12:13, 20.96s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:23<11:53, 20.99s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:44<11:33, 21.02s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:05<11:11, 21.00s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:26<10:50, 20.98s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:47<10:30, 21.00s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:08<10:08, 21.00s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:29<09:48, 21.01s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:50<09:27, 21.01s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:11<09:05, 20.96s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:32<08:44, 21.00s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:53<08:23, 20.98s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:14<08:03, 21.01s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:35<07:42, 21.02s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:56<07:20, 20.98s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:18<07:04, 21.24s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:39<06:44, 21.30s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:01<06:25, 21.43s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:23<06:05, 21.52s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:44<05:42, 21.40s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:05<05:19, 21.28s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:26<04:57, 21.23s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:47<04:35, 21.17s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:08<04:14, 21.20s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:29<03:53, 21.20s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:50<03:31, 21.16s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:12<03:10, 21.15s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:33<02:49, 21.16s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:54<02:27, 21.14s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:15<02:06, 21.13s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:36<01:45, 21.09s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:57<01:24, 21.10s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:18<01:03, 21.12s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:39<00:42, 21.11s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:01<00:21, 21.15s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:07<00:00, 21.06s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.3136, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43465815287038506

	train acc: 0.8787650602409639

	training prec: 0.7738316547439759

	training rec: 0.8787650602409639

	training f1: 0.8225437075365662

	Current Learning rate:  3.571428571428572e-05



  2%|▏         | 1/42 [00:02<01:41,  2.47s/it][A
  5%|▍         | 2/42 [00:05<01:41,  2.54s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.56s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:34,  2.54s/it][A
 14%|█▍        | 6/42 [00:15<01:32,  2.56s/it][A
 17%|█▋        | 7/42 [00:17<01:29,  2.57s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.54s/it][A
 21%|██▏       | 9/42 [00:22<01:24,  2.55s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.56s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.53s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.55s/it][A
 31%|███       | 13/42 [00:33<01:14,  2.56s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.53s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.55s/it][A
 38%|███▊      | 16/42 [00:40<01:06,  2.57s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.54s/it][A
 43%|████▎     | 18/42 [00:45<01:01,  2.55s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.56s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.43368506999242873

	Validation acc: 0.8796130952380953

	Validation prec: 0.7788234747023809

	Validation rec: 0.8796130952380953

	Validation f1: 0.8249138554436313
loss: 


  1%|          | 1/83 [00:21<28:54, 21.15s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:21, 21.00s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:56, 20.95s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:30, 20.89s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:04, 20.83s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:47, 20.88s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:48, 21.17s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:38, 21.31s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:24, 21.42s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:32<26:07, 21.48s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:53<25:50, 21.54s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:15<25:32, 21.58s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:37<25:10, 21.58s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:58<24:51, 21.62s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:20<24:31, 21.64s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:41<24:08, 21.62s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:03<23:48, 21.64s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:25<23:27, 21.65s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:46<23:03, 21.62s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:08<22:41, 21.62s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:30<22:19, 21.60s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:51<21:58, 21.62s/it][A

tensor(0.4697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:13<21:37, 21.63s/it][A

tensor(0.5321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:34<21:13, 21.59s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:56<20:53, 21.61s/it][A

tensor(0.4541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:18<20:29, 21.58s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:39<20:07, 21.56s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:01<19:45, 21.55s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:22<19:25, 21.58s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:44<19:02, 21.56s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:05<18:41, 21.57s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:27<18:19, 21.55s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:48<17:58, 21.57s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:10<17:38, 21.60s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:32<17:15, 21.56s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:53<16:54, 21.59s/it][A

tensor(0.3604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:15<16:33, 21.60s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:36<16:10, 21.56s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:58<15:49, 21.58s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:19<15:26, 21.56s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:41<15:06, 21.58s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:03<14:44, 21.57s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:24<14:22, 21.55s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:46<14:00, 21.56s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:07<13:39, 21.56s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:29<13:16, 21.52s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:50<12:55, 21.54s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:12<12:34, 21.56s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:33<12:11, 21.53s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:55<11:51, 21.56s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:16<11:28, 21.53s/it][A

tensor(0.5007, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:38<11:08, 21.56s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:00<10:46, 21.56s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:21<10:24, 21.54s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:43<10:03, 21.54s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:04<09:41, 21.54s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:26<09:19, 21.54s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:47<08:58, 21.54s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:09<08:36, 21.51s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:30<08:15, 21.54s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:52<07:53, 21.52s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:13<07:27, 21.30s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:34<07:04, 21.21s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:55<06:41, 21.15s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:16<06:19, 21.10s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:37<05:58, 21.10s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:58<05:37, 21.07s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:19<05:16, 21.08s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:40<04:55, 21.10s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:01<04:33, 21.06s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:22<04:12, 21.05s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:43<03:51, 21.07s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:04<03:30, 21.04s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:25<03:09, 21.07s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:46<02:48, 21.08s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:07<02:27, 21.05s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:28<02:06, 21.06s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:49<01:45, 21.06s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:10<01:24, 21.04s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:32<01:03, 21.09s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:53<00:42, 21.09s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:14<00:21, 21.13s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:21<00:00, 21.22s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4362888145877654

	train acc: 0.8771084337349397

	training prec: 0.7708885542168675

	training rec: 0.8771084337349397

	training f1: 0.8201630266624673

	Current Learning rate:  3.428571428571429e-05



  2%|▏         | 1/42 [00:02<01:43,  2.54s/it][A
  5%|▍         | 2/42 [00:04<01:38,  2.47s/it][A
  7%|▋         | 3/42 [00:07<01:37,  2.51s/it][A
 10%|▉         | 4/42 [00:10<01:36,  2.54s/it][A
 12%|█▏        | 5/42 [00:12<01:32,  2.50s/it][A
 14%|█▍        | 6/42 [00:15<01:30,  2.52s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.54s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.50s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.52s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.53s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.52s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.53s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.50s/it][A
 36%|███▌      | 15/42 [00:37<01:08,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.53s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.51s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.52s/it][A
 45%|████▌     | 19/42 [00:47<00:58,  2.53s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4296619430893943

	Validation acc: 0.8836309523809524

	Validation prec: 0.7840252976190476

	Validation rec: 0.8836309523809524

	Validation f1: 0.8300142701469142
loss: 


  1%|          | 1/83 [00:21<28:52, 21.13s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:25, 21.05s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:05, 21.07s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:45, 21.09s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:21, 21.05s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:02, 21.08s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:43, 21.09s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:21, 21.08s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:09<26:04, 21.14s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:31<25:45, 21.17s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:52<25:21, 21.14s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:13<25:02, 21.16s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:34<24:39, 21.14s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:55<24:18, 21.14s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:17<24:05, 21.26s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:38<23:50, 21.36s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:00<23:38, 21.49s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:22<23:19, 21.54s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:43<23:01, 21.58s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:05<22:44, 21.65s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:27<22:22, 21.65s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:49<22:04, 21.71s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:10<21:43, 21.72s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:32<21:17, 21.66s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:54<20:58, 21.69s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:15<20:36, 21.70s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:37<20:15, 21.71s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:59<19:56, 21.76s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:21<19:32, 21.72s/it][A

tensor(0.3447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:43<19:13, 21.77s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:04<18:51, 21.76s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:26<18:28, 21.73s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:48<18:07, 21.74s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:09<17:41, 21.66s/it][A

tensor(0.3760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:31<17:19, 21.66s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:53<17:00, 21.71s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:14<16:39, 21.72s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:35<16:05, 21.46s/it][A

tensor(0.5789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:56<15:38, 21.33s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:17<15:13, 21.25s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:38<14:48, 21.16s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:59<14:27, 21.15s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:20<14:03, 21.09s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:42<13:43, 21.11s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:03<13:22, 21.13s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:24<13:00, 21.09s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:45<12:39, 21.10s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:06<12:18, 21.11s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:27<11:57, 21.09s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:48<11:36, 21.12s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:10<11:20, 21.28s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:32<11:03, 21.40s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:53<10:46, 21.54s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:15<10:25, 21.58s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:37<10:06, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:59<09:45, 21.69s/it][A

tensor(0.3916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:20<09:23, 21.68s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:42<09:02, 21.71s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:04<08:40, 21.67s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:25<08:18, 21.69s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:47<07:58, 21.74s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:09<07:36, 21.74s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:31<07:14, 21.74s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:53<06:53, 21.78s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:14<06:29, 21.64s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:36<06:09, 21.71s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:58<05:47, 21.73s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:19<05:25, 21.71s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:41<05:03, 21.71s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:03<04:42, 21.75s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:24<04:20, 21.71s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:46<03:58, 21.71s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:08<03:37, 21.71s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:30<03:15, 21.72s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:51<02:53, 21.71s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:13<02:31, 21.63s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:34<02:09, 21.64s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:56<01:48, 21.66s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:18<01:26, 21.64s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:39<01:05, 21.67s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:01<00:43, 21.69s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:23<00:21, 21.65s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:30<00:00, 21.33s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43586242988885165

	train acc: 0.8775225903614458

	training prec: 0.7718854715737952

	training rec: 0.8775225903614458

	training f1: 0.8208414707876907

	Current Learning rate:  3.285714285714286e-05



  2%|▏         | 1/42 [00:02<01:49,  2.67s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.60s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.62s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.60s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.62s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.60s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256399195818674

	Validation acc: 0.8876488095238095

	Validation prec: 0.7922014508928571

	Validation rec: 0.8876488095238095

	Validation f1: 0.8361583980329418
loss: 


  1%|          | 1/83 [00:21<29:42, 21.73s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:18, 21.71s/it][A

tensor(0.5321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:50, 21.63s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:31, 21.67s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:14, 21.72s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:10<27:51, 21.71s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:30, 21.72s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:53<27:06, 21.69s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:15<26:46, 21.71s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:36<26:12, 21.54s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:57<25:42, 21.42s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:18<25:17, 21.38s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:40<24:53, 21.34s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:01<24:27, 21.27s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:22<24:06, 21.27s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:43<23:43, 21.25s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:04<23:17, 21.17s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:25<22:56, 21.17s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:47<22:35, 21.19s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:08<22:12, 21.15s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:29<21:51, 21.15s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:50<21:29, 21.13s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:11<21:08, 21.14s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:32<20:48, 21.16s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:53<20:25, 21.13s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:15<20:06, 21.16s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:36<19:44, 21.15s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:57<19:21, 21.12s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:18<19:02, 21.16s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:39<18:39, 21.12s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:00<18:19, 21.15s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:21<17:58, 21.15s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:42<17:36, 21.12s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:04<17:15, 21.14s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:25<16:54, 21.13s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:46<16:32, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:07<16:12, 21.14s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:28<15:50, 21.12s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:49<15:28, 21.09s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:10<15:09, 21.14s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:31<14:46, 21.11s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:53<14:25, 21.12s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:14<14:05, 21.14s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:35<13:42, 21.10s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:56<13:22, 21.12s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:17<13:01, 21.13s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:38<12:39, 21.09s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:59<12:18, 21.11s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:20<11:57, 21.10s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:41<11:37, 21.13s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:03<11:16, 21.14s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:24<10:54, 21.10s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:45<10:33, 21.12s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:06<10:12, 21.12s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:27<09:50, 21.10s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:48<09:29, 21.10s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:09<09:08, 21.09s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:30<08:48, 21.13s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:52<08:27, 21.13s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:13<08:05, 21.11s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:34<07:44, 21.13s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:55<07:23, 21.14s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:16<07:02, 21.12s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:37<06:41, 21.15s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:58<06:20, 21.16s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:19<05:58, 21.11s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:40<05:37, 21.10s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:02<05:16, 21.10s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:23<04:55, 21.09s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:44<04:34, 21.12s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:05<04:13, 21.10s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:26<03:52, 21.11s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:47<03:31, 21.12s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:08<03:09, 21.10s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:29<02:48, 21.11s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:51<02:27, 21.13s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:11<02:06, 21.05s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:32<01:45, 21.05s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:53<01:24, 21.01s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:14<01:02, 20.96s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:35<00:41, 20.95s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:56<00:20, 20.91s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:03<00:00, 21.00s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.3634, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43501622669668083

	train acc: 0.8783509036144579

	training prec: 0.7732893684111446

	training rec: 0.8783509036144579

	training f1: 0.822008532169538

	Current Learning rate:  3.142857142857143e-05



  2%|▏         | 1/42 [00:02<01:44,  2.56s/it][A
  5%|▍         | 2/42 [00:05<01:41,  2.53s/it][A
  7%|▋         | 3/42 [00:07<01:36,  2.48s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.50s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.52s/it][A
 14%|█▍        | 6/42 [00:14<01:29,  2.48s/it][A
 17%|█▋        | 7/42 [00:17<01:27,  2.50s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.51s/it][A
 21%|██▏       | 9/42 [00:22<01:21,  2.48s/it][A
 24%|██▍       | 10/42 [00:24<01:19,  2.49s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:29<01:14,  2.48s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.49s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.51s/it][A
 36%|███▌      | 15/42 [00:37<01:07,  2.51s/it][A
 38%|███▊      | 16/42 [00:39<01:04,  2.48s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.50s/it][A
 43%|████▎     | 18/42 [00:44<00:59,  2.47s/it][A
 45%|████▌     | 19/42 [00:47<00:58,  2.54s/it][A
 48%|████▊     | 20/42 [00:50<00:56,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256360346362704

	Validation acc: 0.8876488095238095

	Validation prec: 0.79052734375

	Validation rec: 0.8876488095238095

	Validation f1: 0.835588252686812
loss: 


  1%|          | 1/83 [00:21<29:47, 21.80s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:21, 21.74s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:49, 21.62s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:33, 21.69s/it][A

tensor(0.4228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:12, 21.69s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:10<27:48, 21.66s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:25, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:53<27:02, 21.63s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:14<26:35, 21.56s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:36<26:15, 21.59s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:57<25:54, 21.59s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:19<25:30, 21.56s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:41<25:09, 21.56s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:02<24:45, 21.53s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:24<24:25, 21.55s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:45<24:04, 21.56s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:07<23:41, 21.54s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:28<23:20, 21.55s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:50<22:59, 21.55s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:11<22:35, 21.52s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:33<22:15, 21.55s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:54<21:50, 21.48s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:15<21:16, 21.27s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:36<20:45, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:56<20:16, 20.97s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:17<19:52, 20.93s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:39<19:45, 21.18s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:01<19:32, 21.32s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:22<19:18, 21.46s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:44<18:59, 21.51s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:06<18:41, 21.56s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:27<18:22, 21.63s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:48<17:53, 21.47s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:10<17:37, 21.58s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:32<17:18, 21.63s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:53<16:45, 21.39s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:14<16:20, 21.31s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:36<16:02, 21.39s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:57<15:44, 21.47s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:19<15:27, 21.58s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:41<15:06, 21.58s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:02<14:46, 21.61s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:24<14:25, 21.63s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:46<14:02, 21.61s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:07<13:42, 21.64s/it][A

tensor(0.4072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:29<13:21, 21.66s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:51<12:57, 21.61s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:12<12:36, 21.62s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:34<12:15, 21.63s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:55<11:53, 21.63s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:17<11:32, 21.64s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:39<11:10, 21.64s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:00<10:48, 21.62s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:22<10:27, 21.65s/it][A

tensor(0.3290, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:44<10:06, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:05<09:44, 21.66s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:27<09:19, 21.53s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:48<08:54, 21.39s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:09<08:31, 21.30s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:30<08:08, 21.24s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:51<07:45, 21.16s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:12<07:24, 21.16s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:33<07:03, 21.16s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:54<06:41, 21.13s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:15<06:20, 21.15s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:36<05:58, 21.12s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:58<05:38, 21.13s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:19<05:16, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:40<04:55, 21.08s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:01<04:34, 21.08s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:22<04:13, 21.12s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:43<03:52, 21.10s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:04<03:31, 21.10s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:25<03:09, 21.08s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:46<02:48, 21.10s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:07<02:27, 21.12s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:29<02:06, 21.09s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:50<01:45, 21.10s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:11<01:24, 21.12s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:32<01:03, 21.11s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:53<00:42, 21.15s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:14<00:21, 21.15s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:21<00:00, 21.22s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43666010520544396

	train acc: 0.8766942771084337

	training prec: 0.770287203501506

	training rec: 0.8766942771084337

	training f1: 0.819609348885488

	Current Learning rate:  3e-05



  2%|▏         | 1/42 [00:02<01:46,  2.59s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.60s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.55s/it][A
 10%|▉         | 4/42 [00:10<01:37,  2.56s/it][A
 12%|█▏        | 5/42 [00:12<01:35,  2.57s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.54s/it][A
 17%|█▋        | 7/42 [00:17<01:29,  2.55s/it][A
 19%|█▉        | 8/42 [00:20<01:27,  2.56s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.53s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.55s/it][A
 26%|██▌       | 11/42 [00:28<01:19,  2.55s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.56s/it][A
 31%|███       | 13/42 [00:33<01:13,  2.53s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.55s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.55s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.52s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.54s/it][A
 43%|████▎     | 18/42 [00:45<01:01,  2.55s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.53s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.43366765550204683

	Validation acc: 0.8796130952380953

	Validation prec: 0.7778004092261904

	Validation rec: 0.8796130952380953

	Validation f1: 0.8246022730463045
loss: 


  1%|          | 1/83 [00:21<28:44, 21.03s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:31, 21.13s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:15, 21.20s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:54, 21.20s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:31, 21.17s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:07<27:11, 21.19s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:47, 21.15s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:49<26:27, 21.17s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:05, 21.16s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:31<25:41, 21.12s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:52<25:22, 21.15s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:13<24:58, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:34<24:39, 21.13s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:56<24:18, 21.14s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:17<23:56, 21.12s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:38<23:35, 21.13s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:59<23:19, 21.20s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:21<23:03, 21.29s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:42<22:50, 21.41s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:04<22:33, 21.48s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:26<22:13, 21.50s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:47<21:55, 21.57s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:09<21:35, 21.59s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:31<21:16, 21.63s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:52<20:56, 21.66s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:14<20:32, 21.63s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:36<20:11, 21.64s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:57<19:52, 21.69s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:19<19:28, 21.63s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:41<19:07, 21.65s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:02<18:45, 21.64s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:24<18:21, 21.61s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:45<18:02, 21.64s/it][A

tensor(0.3447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:07<17:38, 21.60s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:29<17:18, 21.63s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:50<16:56, 21.63s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:12<16:34, 21.63s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:34<16:13, 21.64s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:55<15:52, 21.65s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:17<15:29, 21.62s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:38<15:08, 21.62s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:00<14:45, 21.60s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:22<14:25, 21.63s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:43<14:03, 21.63s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:05<13:40, 21.58s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:26<13:19, 21.62s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:48<12:58, 21.62s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:10<12:34, 21.57s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:30<12:06, 21.36s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:51<11:39, 21.21s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:12<11:14, 21.09s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:33<10:52, 21.05s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:54<10:29, 20.98s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:15<10:08, 20.99s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:36<09:46, 20.96s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:57<09:25, 20.93s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:18<09:04, 20.95s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:39<08:43, 20.95s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:59<08:21, 20.90s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:20<08:00, 20.91s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:41<07:39, 20.87s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:02<07:18, 20.90s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:23<06:58, 20.92s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:44<06:36, 20.89s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:05<06:16, 20.89s/it][A

tensor(0.3603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:26<05:55, 20.89s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:46<05:33, 20.86s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:07<05:13, 20.87s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:28<04:51, 20.85s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:49<04:32, 20.96s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:10<04:12, 21.02s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:31<03:51, 21.02s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:53<03:30, 21.07s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:14<03:10, 21.11s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:35<02:48, 21.10s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:56<02:27, 21.13s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:17<02:06, 21.14s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:38<01:45, 21.11s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:59<01:24, 21.12s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:21<01:03, 21.14s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:42<00:42, 21.10s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:03<00:21, 21.12s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:09<00:00, 21.08s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4362376669803298

	train acc: 0.8771084337349397

	training prec: 0.7710179781626506

	training rec: 0.8771084337349397

	training f1: 0.8202017620520823

	Current Learning rate:  2.857142857142857e-05



  2%|▏         | 1/42 [00:02<01:45,  2.56s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.56s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.56s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.51s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.54s/it][A
 17%|█▋        | 7/42 [00:17<01:27,  2.51s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.52s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.54s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.54s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.53s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.53s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.50s/it][A
 36%|███▌      | 15/42 [00:37<01:08,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.53s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.50s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.52s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.53s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.429647962252299

	Validation acc: 0.8836309523809524

	Validation prec: 0.7835137648809524

	Validation rec: 0.8836309523809524

	Validation f1: 0.8298533784081362
loss: 


  1%|          | 1/83 [00:21<28:53, 21.14s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:23, 21.03s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:05, 21.07s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:42, 21.05s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:20, 21.03s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:01, 21.06s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:39, 21.04s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:22, 21.09s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:09<26:04, 21.14s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:41, 21.11s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:52<25:23, 21.16s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:13<25:02, 21.16s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:34<24:38, 21.13s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:55<24:18, 21.14s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:16<23:56, 21.13s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:37<23:34, 21.11s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:58<23:14, 21.13s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:19<22:52, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:41<22:32, 21.14s/it][A

tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:02<22:11, 21.14s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:23<21:48, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:44<21:28, 21.13s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:05<21:07, 21.13s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:26<20:45, 21.11s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:47<20:24, 21.11s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:08<20:01, 21.08s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:29<19:41, 21.10s/it][A

tensor(0.3290, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:51<19:21, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:12<18:57, 21.07s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:33<18:38, 21.10s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:54<18:17, 21.10s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:15<17:54, 21.07s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:36<17:34, 21.09s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:57<17:12, 21.07s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:18<16:52, 21.10s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:39<16:32, 21.11s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:00<16:09, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:21<15:48, 21.08s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:42<15:27, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:03<15:05, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:24<14:44, 21.06s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:46<14:24, 21.08s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:07<14:01, 21.03s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:28<13:41, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:49<13:22, 21.11s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:10<13:00, 21.10s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:31<12:40, 21.12s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:52<12:18, 21.09s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:13<11:57, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:34<11:36, 21.11s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:55<11:15, 21.09s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:17<10:59, 21.26s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:39<10:42, 21.41s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:00<10:22, 21.46s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:22<10:03, 21.56s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:44<09:43, 21.60s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:06<09:21, 21.61s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:27<09:00, 21.63s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:48<08:34, 21.45s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:09<08:11, 21.35s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:30<07:47, 21.27s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:52<07:25, 21.22s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:13<07:03, 21.19s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:34<06:42, 21.18s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:55<06:20, 21.13s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:16<05:59, 21.14s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:37<05:38, 21.14s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:58<05:16, 21.11s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:19<04:55, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:40<04:34, 21.08s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:01<04:13, 21.11s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:23<03:52, 21.12s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:44<03:30, 21.10s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:05<03:10, 21.14s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:26<02:49, 21.18s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:47<02:27, 21.14s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:08<02:07, 21.17s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:30<01:45, 21.13s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:51<01:24, 21.15s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:12<01:03, 21.14s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:33<00:42, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:54<00:20, 21.00s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:00<00:00, 20.97s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43539754274379777

	train acc: 0.8779367469879519

	training prec: 0.7721602033132531

	training rec: 0.8779367469879519

	training f1: 0.8212903779275637

	Current Learning rate:  2.714285714285714e-05



  2%|▏         | 1/42 [00:02<01:40,  2.45s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.52s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.54s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.51s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.54s/it][A
 17%|█▋        | 7/42 [00:17<01:29,  2.55s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.52s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.53s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.54s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.53s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.54s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.51s/it][A
 36%|███▌      | 15/42 [00:37<01:08,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.53s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.51s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.52s/it][A
 45%|████▌     | 19/42 [00:47<00:58,  2.52s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4296461067029408

	Validation acc: 0.8836309523809524

	Validation prec: 0.7834672619047619

	Validation rec: 0.8836309523809524

	Validation f1: 0.8298441945994884
loss: 


  1%|          | 1/83 [00:20<28:30, 20.86s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:06, 20.82s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:44, 20.81s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:25, 20.83s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:03, 20.82s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:45, 20.85s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:27, 20.89s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:06, 20.88s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:46, 20.90s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:24, 20.89s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:06, 20.92s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:45, 20.92s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:22, 20.89s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<24:03, 20.92s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:42, 20.91s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:19, 20.89s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<22:59, 20.91s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:39, 20.92s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:15, 20.87s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:57<21:55, 20.89s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:18<21:33, 20.87s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:39<21:14, 20.89s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:00<20:52, 20.88s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:21<20:29, 20.84s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:42<20:10, 20.88s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:02<19:50, 20.88s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:23<19:28, 20.87s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:44<19:07, 20.87s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:05<18:46, 20.86s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:26<18:25, 20.85s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:47<18:05, 20.87s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:07<17:41, 20.81s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:28<17:21, 20.84s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:49<17:02, 20.87s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:10<16:40, 20.84s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:32<16:34, 21.15s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:54<16:21, 21.33s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:15<16:03, 21.41s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:45, 21.49s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<15:24, 21.50s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<15:04, 21.54s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:42<14:44, 21.56s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:03<14:21, 21.54s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:25<14:01, 21.57s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:46<13:40, 21.59s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:08<13:17, 21.55s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:29<12:56, 21.56s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:51<12:34, 21.55s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:12<12:11, 21.52s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:34<11:51, 21.56s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:56<11:28, 21.53s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:17<11:08, 21.56s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:39<10:46, 21.56s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:00<10:23, 21.51s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:22<10:02, 21.53s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:43<09:41, 21.53s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:05<09:18, 21.50s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:26<08:57, 21.49s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:48<08:35, 21.46s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:09<08:13, 21.48s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:31<07:52, 21.48s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:52<07:30, 21.46s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:13<07:09, 21.47s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:35<06:48, 21.49s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:56<06:26, 21.46s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:18<06:05, 21.50s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:39<05:43, 21.49s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:01<05:22, 21.50s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:22<05:00, 21.49s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:44<04:38, 21.46s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:05<04:17, 21.47s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:27<03:56, 21.50s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:48<03:34, 21.45s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:10<03:13, 21.45s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:31<02:51, 21.45s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:52<02:30, 21.43s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:14<02:08, 21.44s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:35<01:47, 21.45s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:57<01:25, 21.44s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:18<01:04, 21.46s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:40<00:42, 21.45s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:01<00:21, 21.45s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:08<00:00, 21.07s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43622134859303396

	train acc: 0.8771084337349397

	training prec: 0.7710297439759036

	training rec: 0.8771084337349397

	training f1: 0.8202061508211569

	Current Learning rate:  2.5714285714285714e-05



  2%|▏         | 1/42 [00:02<01:47,  2.62s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.57s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.60s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.62s/it][A
 12%|█▏        | 5/42 [00:12<01:35,  2.59s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.61s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.59s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.60s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:33<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:12,  2.60s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.60s/it][A
 43%|████▎     | 18/42 [00:46<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256269761494228

	Validation acc: 0.8876488095238095

	Validation prec: 0.7914109002976191

	Validation rec: 0.8876488095238095

	Validation f1: 0.8358514017200214
loss: 


  1%|          | 1/83 [00:21<29:25, 21.54s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:59, 21.48s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:38, 21.48s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:25<28:15, 21.47s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:47<27:50, 21.42s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:08<27:31, 21.45s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:30<27:11, 21.47s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:51<26:45, 21.41s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:12<26:25, 21.42s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:34<26:03, 21.42s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:55<25:39, 21.38s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:17<25:21, 21.43s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:38<25:03, 21.47s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:00<24:46, 21.54s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:22<24:26, 21.57s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:43<24:05, 21.58s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:05<23:48, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:27<23:28, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:48<23:04, 21.63s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:10<22:43, 21.64s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:32<22:21, 21.63s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:53<22:01, 21.66s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:15<21:41, 21.68s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:37<21:18, 21.66s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:58<20:57, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:20<20:35, 21.67s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:42<20:12, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:03<19:51, 21.66s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:25<19:29, 21.65s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:47<19:10, 21.71s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:09<18:49, 21.72s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:30<18:25, 21.68s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:52<18:05, 21.72s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:14<17:43, 21.71s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:35<17:19, 21.66s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:57<16:58, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:19<16:37, 21.68s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:40<16:13, 21.64s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:02<15:52, 21.65s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:23<15:30, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:45<15:06, 21.59s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:07<14:47, 21.65s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:28<14:26, 21.66s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:50<14:04, 21.66s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:12<13:43, 21.67s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:33<13:21, 21.66s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:55<12:59, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:17<12:38, 21.67s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:38<12:16, 21.66s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:00<11:54, 21.66s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:22<11:33, 21.67s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:43<11:09, 21.61s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:05<10:48, 21.63s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:26<10:26, 21.62s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:48<10:05, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:10<09:44, 21.66s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:32<09:23, 21.68s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:53<09:02, 21.68s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:15<08:40, 21.67s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:36<08:18, 21.66s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:58<07:56, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:20<07:35, 21.67s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:41<07:13, 21.66s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:03<06:51, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:25<06:29, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:47<06:08, 21.71s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:08<05:47, 21.70s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:30<05:24, 21.66s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:52<05:03, 21.68s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:13<04:42, 21.69s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:35<04:20, 21.69s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:57<03:58, 21.69s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:18<03:36, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:40<03:14, 21.67s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:02<02:53, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:23<02:31, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:45<02:09, 21.66s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:07<01:48, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:28<01:26, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:50<01:05, 21.68s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:12<00:43, 21.69s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:33<00:21, 21.63s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:40<00:00, 21.45s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.3633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43497466610138674

	train acc: 0.8783509036144579

	training prec: 0.7729363940135543

	training rec: 0.8783509036144579

	training f1: 0.821898229003296

	Current Learning rate:  2.4285714285714288e-05



  2%|▏         | 1/42 [00:02<01:49,  2.66s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.60s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.60s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256256137575422

	Validation acc: 0.8876488095238095

	Validation prec: 0.7917364211309523

	Validation rec: 0.8876488095238095

	Validation f1: 0.8359638804854842
loss: 


  1%|          | 1/83 [00:21<29:34, 21.64s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:13, 21.64s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:49, 21.62s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:30, 21.65s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:10, 21.67s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:09<27:46, 21.64s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:25, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:53<27:03, 21.65s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:14<26:41, 21.64s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:36<26:21, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:58<25:58, 21.65s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:19<25:37, 21.66s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:41<25:14, 21.63s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:02<24:51, 21.61s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:24<24:30, 21.62s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:46<24:11, 21.67s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:07<23:49, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:29<23:28, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:51<23:07, 21.68s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:12<22:43, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:34<22:22, 21.65s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:56<21:59, 21.64s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:17<21:40, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:39<21:18, 21.68s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:01<20:56, 21.66s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:22<20:35, 21.67s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:44<20:13, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:06<19:49, 21.62s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:27<19:27, 21.63s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:49<19:05, 21.61s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:11<18:45, 21.65s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:32<18:24, 21.65s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:54<18:01, 21.63s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:16<17:40, 21.65s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:37<17:19, 21.66s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:59<16:57, 21.65s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:21<16:37, 21.69s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:42<16:16, 21.70s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:04<15:52, 21.64s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:25<15:30, 21.64s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:47<15:06, 21.58s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:08<14:44, 21.57s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:30<14:21, 21.54s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:51<13:58, 21.50s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:13<13:36, 21.49s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:34<13:16, 21.53s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:56<12:55, 21.55s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:18<12:35, 21.60s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:39<12:14, 21.61s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:01<11:53, 21.62s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:23<11:32, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:44<11:10, 21.63s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:06<10:49, 21.65s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:28<10:27, 21.65s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:49<10:05, 21.61s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:11<09:43, 21.61s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:32<09:21, 21.59s/it][A

tensor(0.3759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:54<09:00, 21.62s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:16<08:39, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:37<08:17, 21.61s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:59<07:56, 21.65s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:21<07:35, 21.68s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:42<07:13, 21.65s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:04<06:51, 21.66s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:26<06:29, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:47<06:07, 21.63s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:09<05:46, 21.65s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:31<05:25, 21.67s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:52<05:02, 21.63s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:14<04:41, 21.66s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:35<04:19, 21.63s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:57<03:58, 21.66s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:19<03:36, 21.66s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:40<03:14, 21.63s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:02<02:53, 21.63s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:24<02:31, 21.67s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:45<02:09, 21.64s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:07<01:48, 21.64s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:29<01:26, 21.63s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:50<01:04, 21.60s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:12<00:43, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:33<00:21, 21.61s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:40<00:00, 21.45s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43579691397138387

	train acc: 0.8775225903614458

	training prec: 0.7715148484563252

	training rec: 0.8775225903614458

	training f1: 0.8207218014041117

	Current Learning rate:  2.2857142857142858e-05



  2%|▏         | 1/42 [00:02<01:49,  2.66s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.61s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.61s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256244074730646

	Validation acc: 0.8876488095238095

	Validation prec: 0.7912713913690477

	Validation rec: 0.8876488095238095

	Validation f1: 0.8358256621909319
loss: 


  1%|          | 1/83 [00:21<29:37, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:13, 21.65s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:50, 21.63s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:29, 21.64s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:08, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:09<27:44, 21.61s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:30<27:04, 21.37s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:51<26:33, 21.24s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:12<26:02, 21.12s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:33<25:38, 21.08s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:54<25:17, 21.08s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:15<24:52, 21.02s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:36<24:32, 21.04s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:57<24:11, 21.04s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:18<23:51, 21.05s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:39<23:31, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:00<23:09, 21.05s/it][A

tensor(0.4851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:21<22:51, 21.10s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:43<22:30, 21.10s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:04<22:07, 21.08s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:25<21:49, 21.13s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:46<21:27, 21.10s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:07<21:06, 21.10s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:28<20:44, 21.10s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:49<20:21, 21.06s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:10<20:00, 21.06s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:31<19:39, 21.07s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:52<19:15, 21.01s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:13<18:54, 21.02s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:34<18:33, 21.02s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:55<18:13, 21.04s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:16<17:53, 21.05s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:37<17:32, 21.04s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:59<17:13, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:20<16:52, 21.10s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:41<16:30, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:02<16:09, 21.08s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:23<15:48, 21.07s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:44<15:25, 21.04s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:05<15:05, 21.07s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:26<14:45, 21.07s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:47<14:23, 21.06s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:08<14:03, 21.08s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:29<13:42, 21.08s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:50<13:21, 21.10s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:11<13:00, 21.10s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:32<12:38, 21.07s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:54<12:17, 21.08s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:15<11:56, 21.07s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:36<11:34, 21.05s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:57<11:14, 21.07s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:18<10:53, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:39<10:31, 21.05s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:00<10:11, 21.07s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:21<09:49, 21.06s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:42<09:29, 21.11s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:03<09:08, 21.11s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:24<08:46, 21.07s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:45<08:25, 21.07s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:06<08:04, 21.07s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:27<07:42, 21.03s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:48<07:21, 21.02s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:09<07:00, 21.02s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:30<06:39, 21.02s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:51<06:18, 21.04s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:12<05:56, 21.00s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:33<05:36, 21.01s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:54<05:15, 21.02s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:15<04:54, 21.02s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:36<04:33, 21.03s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:58<04:12, 21.04s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:18<03:51, 21.01s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:40<03:30, 21.05s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:01<03:09, 21.01s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:22<02:48, 21.03s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:43<02:27, 21.07s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:04<02:06, 21.06s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:25<01:45, 21.08s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:46<01:24, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:07<01:03, 21.05s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:28<00:42, 21.07s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:49<00:21, 21.06s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:56<00:00, 20.92s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43537693318114223

	train acc: 0.8779367469879519

	training prec: 0.7725072948042169

	training rec: 0.8779367469879519

	training f1: 0.8213960858350526

	Current Learning rate:  2.1428571428571428e-05



  2%|▏         | 1/42 [00:02<01:45,  2.58s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.56s/it][A
  7%|▋         | 3/42 [00:07<01:37,  2.50s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:29,  2.50s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.52s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.53s/it][A
 21%|██▏       | 9/42 [00:22<01:22,  2.50s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.51s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.52s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.53s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.50s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.52s/it][A
 36%|███▌      | 15/42 [00:37<01:08,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:04,  2.49s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.51s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.52s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.49s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4296410509518215

	Validation acc: 0.8836309523809524

	Validation prec: 0.7847693452380953

	Validation rec: 0.8836309523809524

	Validation f1: 0.8302266458891081
loss: 


  1%|          | 1/83 [00:20<28:32, 20.89s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:21, 21.01s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:07, 21.10s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:43, 21.06s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:27, 21.13s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:06, 21.12s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:39, 21.05s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:20, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:09<25:59, 21.08s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:36, 21.04s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:51<25:17, 21.07s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:12<24:53, 21.03s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:33<24:32, 21.04s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:54<24:13, 21.06s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:15<23:50, 21.03s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:36<23:31, 21.07s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:58<23:11, 21.09s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:19<22:47, 21.04s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:40<22:28, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:01<22:08, 21.08s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:22<21:46, 21.07s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:43<21:25, 21.08s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:04<21:02, 21.05s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:25<20:42, 21.07s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:46<20:21, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:07<19:59, 21.04s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:28<19:39, 21.06s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:49<19:19, 21.09s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:10<18:56, 21.05s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:31<18:36, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:53<18:16, 21.09s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:14<17:53, 21.06s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:35<17:33, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:56<17:10, 21.03s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:17<16:49, 21.04s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:38<16:28, 21.04s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:59<16:07, 21.03s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:20<15:47, 21.05s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:41<15:26, 21.06s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:02<15:04, 21.02s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:23<14:43, 21.04s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:44<14:21, 21.02s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:05<14:00, 21.02s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:26<13:40, 21.05s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:47<13:19, 21.03s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:08<12:58, 21.04s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:29<12:37, 21.04s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:50<12:15, 21.01s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:11<11:55, 21.05s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:32<11:34, 21.06s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:53<11:13, 21.04s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:14<10:52, 21.05s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:35<10:30, 21.03s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:56<10:10, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:17<09:49, 21.07s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:38<09:27, 21.02s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:59<09:06, 21.02s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:20<08:45, 21.01s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:41<08:23, 20.99s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:02<08:03, 21.04s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:23<07:42, 21.01s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:45<07:21, 21.03s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:06<07:00, 21.03s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:27<06:39, 21.02s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:48<06:18, 21.03s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:09<05:57, 21.04s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:30<05:36, 21.02s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:51<05:16, 21.07s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:12<04:54, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:33<04:33, 21.05s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:54<04:12, 21.06s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:15<03:51, 21.05s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:36<03:30, 21.06s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:57<03:09, 21.06s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:18<02:48, 21.05s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:39<02:27, 21.06s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:00<02:06, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:21<01:44, 20.99s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:42<01:23, 20.96s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:03<01:02, 20.94s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:24<00:41, 20.89s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:45<00:20, 20.88s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:51<00:00, 20.86s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43537557627781326

	train acc: 0.8779367469879519

	training prec: 0.7722425640060241

	training rec: 0.8779367469879519

	training f1: 0.8213210498488631

	Current Learning rate:  2e-05



  2%|▏         | 1/42 [00:02<01:42,  2.51s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.51s/it][A
  7%|▋         | 3/42 [00:07<01:38,  2.52s/it][A
 10%|▉         | 4/42 [00:09<01:34,  2.48s/it][A
 12%|█▏        | 5/42 [00:12<01:32,  2.49s/it][A
 14%|█▍        | 6/42 [00:15<01:30,  2.50s/it][A
 17%|█▋        | 7/42 [00:17<01:26,  2.47s/it][A
 19%|█▉        | 8/42 [00:19<01:24,  2.49s/it][A
 21%|██▏       | 9/42 [00:22<01:22,  2.50s/it][A
 24%|██▍       | 10/42 [00:24<01:20,  2.51s/it][A
 26%|██▌       | 11/42 [00:27<01:16,  2.48s/it][A
 29%|██▊       | 12/42 [00:29<01:14,  2.50s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.50s/it][A
 33%|███▎      | 14/42 [00:34<01:09,  2.47s/it][A
 36%|███▌      | 15/42 [00:37<01:07,  2.49s/it][A
 38%|███▊      | 16/42 [00:39<01:05,  2.51s/it][A
 40%|████      | 17/42 [00:42<01:01,  2.47s/it][A
 43%|████▎     | 18/42 [00:44<00:59,  2.49s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.50s/it][A
 48%|████▊     | 20/42 [00:49<00:54,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.42964019661857966

	Validation acc: 0.8836309523809524

	Validation prec: 0.7830487351190476

	Validation rec: 0.8836309523809524

	Validation f1: 0.8297193821469092
loss: 


  1%|          | 1/83 [00:20<28:25, 20.80s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:58, 20.72s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:41, 20.77s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:23, 20.81s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:58, 20.75s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:40, 20.79s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:18, 20.77s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<25:59, 20.79s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:43, 20.85s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:18, 20.80s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:48<24:58, 20.81s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:09<24:37, 20.81s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:30<24:14, 20.79s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:51<23:55, 20.80s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:11<23:35, 20.81s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:32<23:11, 20.77s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:53<22:56, 20.86s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:14<22:36, 20.87s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:35<22:21, 20.96s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:56<22:03, 21.00s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:17<21:42, 21.01s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:38<21:22, 21.03s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:00<21:02, 21.04s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:21<20:40, 21.03s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:42<20:20, 21.05s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:03<19:59, 21.05s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:24<19:40, 21.09s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:45<19:20, 21.10s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:06<18:58, 21.09s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:27<18:37, 21.09s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:48<18:17, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:09<17:56, 21.10s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:30<17:34, 21.10s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<17:12, 21.08s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:13<16:52, 21.09s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:34<16:32, 21.12s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:55<16:10, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:16<15:50, 21.12s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:27, 21.09s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<15:06, 21.07s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:19<14:45, 21.08s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:40<14:25, 21.10s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:01<14:03, 21.09s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:23<13:43, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:44<13:21, 21.10s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:05<12:59, 21.06s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:38, 21.07s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:47<12:17, 21.06s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:08<11:57, 21.10s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:29<11:36, 21.11s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:50<11:15, 21.09s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:11<10:55, 21.13s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:32<10:33, 21.13s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:54<10:13, 21.14s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:15<09:51, 21.13s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:36<09:30, 21.12s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:57<09:08, 21.08s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:18<08:47, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:39<08:25, 21.07s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:00<08:05, 21.10s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:21<07:44, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:42<07:22, 21.08s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:03<07:02, 21.10s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:25<06:41, 21.13s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:46<06:19, 21.09s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:07<05:58, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:28<05:37, 21.08s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:49<05:16, 21.08s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:10<04:55, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:31<04:33, 21.08s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:52<04:13, 21.10s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:13<03:52, 21.10s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:34<03:30, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:55<03:09, 21.08s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:16<02:48, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:37<02:27, 21.07s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:59<02:06, 21.11s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:20<01:45, 21.09s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:41<01:24, 21.14s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:02<01:03, 21.15s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:23<00:42, 21.15s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:44<00:21, 21.15s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:51<00:00, 20.86s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4353732423609998

	train acc: 0.8779367469879519

	training prec: 0.7727896743222892

	training rec: 0.8779367469879519

	training f1: 0.8214857321302889

	Current Learning rate:  1.8571428571428572e-05



  2%|▏         | 1/42 [00:02<01:40,  2.46s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.52s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.55s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.54s/it][A
 17%|█▋        | 7/42 [00:17<01:29,  2.55s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.52s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.54s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.54s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.54s/it][A
 31%|███       | 13/42 [00:32<01:14,  2.56s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.53s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.54s/it][A
 38%|███▊      | 16/42 [00:40<01:06,  2.55s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.53s/it][A
 43%|████▎     | 18/42 [00:45<01:01,  2.54s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.55s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.42562187143734526

	Validation acc: 0.8876488095238095

	Validation prec: 0.7904808407738095

	Validation rec: 0.8876488095238095

	Validation f1: 0.8355702644227789
loss: 


  1%|          | 1/83 [00:21<28:59, 21.21s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:31, 21.13s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:13, 21.16s/it][A

tensor(0.3915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:54, 21.20s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:30, 21.16s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:07<27:10, 21.18s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:41, 21.07s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:10, 20.95s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:09<25:47, 20.91s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:22, 20.85s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:00, 20.85s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:40, 20.85s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:18, 20.83s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<23:58, 20.84s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:37, 20.85s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:14, 20.81s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<22:53, 20.82s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:33, 20.82s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:09, 20.78s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:50, 20.80s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:18<21:27, 20.76s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:39<21:08, 20.80s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:00<20:48, 20.81s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:21<20:27, 20.80s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:42<20:07, 20.82s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:03<19:51, 20.90s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:24<19:32, 20.94s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:45<19:14, 20.99s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:06<18:56, 21.04s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:27<18:34, 21.03s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:48<18:15, 21.07s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:09<17:54, 21.06s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:33, 21.08s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<17:13, 21.10s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:13<16:51, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:34<16:31, 21.09s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:55<16:12, 21.14s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:16<15:48, 21.09s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:28, 21.10s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<15:05, 21.06s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:19<14:44, 21.05s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:40<14:23, 21.05s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:01<14:01, 21.04s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:22<13:40, 21.05s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:43<13:20, 21.06s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:04<12:58, 21.04s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:39, 21.09s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:47<12:18, 21.10s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:08<11:56, 21.08s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:29<11:36, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:50<11:14, 21.08s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:11<10:53, 21.09s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:32<10:33, 21.11s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:53<10:10, 21.05s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:14<09:50, 21.09s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:35<09:29, 21.10s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:56<09:07, 21.07s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:17<08:47, 21.08s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:38<08:24, 21.02s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:00<08:05, 21.11s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:21<07:48, 21.28s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:43<07:28, 21.36s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:05<07:08, 21.45s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:26<06:48, 21.52s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:48<06:27, 21.54s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:09<06:06, 21.58s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:31<05:45, 21.61s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:53<05:24, 21.66s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:15<05:03, 21.67s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:36<04:41, 21.63s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:58<04:19, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:19<03:58, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:41<03:36, 21.63s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:03<03:14, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:25<02:54, 21.78s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:47<02:32, 21.83s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:10<02:12, 22.15s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:32<01:50, 22.17s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:54<01:28, 22.03s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:15<01:05, 21.84s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:38<00:44, 22.09s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:01<00:22, 22.33s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:07<00:00, 21.06s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.3633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4349533010678119

	train acc: 0.8783509036144579

	training prec: 0.7729246282003013

	training rec: 0.8783509036144579

	training f1: 0.8218981556602958

	Current Learning rate:  1.7142857142857145e-05



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:43,  2.58s/it][A
  7%|▋         | 3/42 [00:08<01:45,  2.69s/it][A
 10%|▉         | 4/42 [00:10<01:42,  2.68s/it][A
 12%|█▏        | 5/42 [00:13<01:35,  2.59s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.61s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.61s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:26,  2.72s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.69s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:39<01:12,  2.70s/it][A
 38%|███▊      | 16/42 [00:42<01:10,  2.73s/it][A
 40%|████      | 17/42 [00:45<01:07,  2.69s/it][A
 43%|████▎     | 18/42 [00:48<01:04,  2.69s/it][A
 45%|████▌     | 19/42 [00:51<01:04,  2.82s/it][A
 48%|████▊     | 20/42 [00:54<01:02,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.425621278938793

	Validation acc: 0.8876488095238095

	Validation prec: 0.7914574032738095

	Validation rec: 0.8876488095238095

	Validation f1: 0.8358807494620022
loss: 


  1%|          | 1/83 [00:21<28:51, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:17, 20.96s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:57, 20.97s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:37, 20.99s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:12, 20.94s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:51, 20.93s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:37, 21.02s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:30, 21.21s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:21, 21.37s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:31<25:51, 21.26s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:52<25:22, 21.15s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:13<25:02, 21.16s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:34<24:42, 21.18s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:55<24:21, 21.18s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:17<24:09, 21.31s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:38<23:48, 21.33s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:00<23:31, 21.39s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:21<23:12, 21.42s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:43<22:49, 21.39s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:04<22:28, 21.41s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:25<22:06, 21.40s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:47<21:45, 21.39s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:08<21:12, 21.21s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:28<20:41, 21.04s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:49<20:17, 20.99s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:10<19:54, 20.95s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:31<19:31, 20.92s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:52<19:12, 20.96s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:13<18:52, 20.97s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:34<18:32, 21.00s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:55<18:13, 21.03s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:16<17:52, 21.02s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:37<17:32, 21.05s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:58<17:13, 21.09s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:19<16:52, 21.09s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:41<16:31, 21.10s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:02<16:11, 21.11s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:23<15:48, 21.08s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:44<15:28, 21.10s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:05<15:07, 21.10s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:26<14:44, 21.07s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:47<14:23, 21.06s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:08<14:02, 21.06s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:29<13:42, 21.10s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:50<13:21, 21.10s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:11<12:59, 21.06s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:32<12:39, 21.09s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:54<12:18, 21.10s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:15<11:56, 21.08s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:36<11:35, 21.09s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:57<11:15, 21.10s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:18<10:52, 21.05s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:39<10:33, 21.12s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:00<10:11, 21.09s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:21<09:50, 21.10s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:42<09:30, 21.12s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:03<09:08, 21.10s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:25<08:48, 21.13s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:46<08:27, 21.13s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:07<08:05, 21.12s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:28<07:44, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:49<07:22, 21.09s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:10<07:01, 21.08s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:31<06:40, 21.09s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:52<06:19, 21.07s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:13<05:58, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:35<05:38, 21.15s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:56<05:16, 21.11s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:17<04:55, 21.10s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:38<04:34, 21.15s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:59<04:13, 21.11s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:20<03:52, 21.11s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:41<03:30, 21.09s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:02<03:09, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:23<02:48, 21.12s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:44<02:27, 21.10s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:06<02:06, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:27<01:45, 21.13s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:48<01:24, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:09<01:03, 21.11s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:30<00:42, 21.10s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:51<00:21, 21.07s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:58<00:00, 20.94s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43578004442065593

	train acc: 0.8775225903614458

	training prec: 0.7716266236822289

	training rec: 0.8775225903614458

	training f1: 0.8207559938055428

	Current Learning rate:  1.5714285714285715e-05



  2%|▏         | 1/42 [00:02<01:45,  2.57s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.55s/it][A
  7%|▋         | 3/42 [00:07<01:37,  2.50s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:31,  2.49s/it][A
 14%|█▍        | 6/42 [00:15<01:30,  2.51s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.53s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.53s/it][A
 21%|██▏       | 9/42 [00:22<01:22,  2.50s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.51s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.53s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.50s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.52s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.54s/it][A
 36%|███▌      | 15/42 [00:37<01:07,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.53s/it][A
 40%|████      | 17/42 [00:42<01:03,  2.54s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.51s/it][A
 45%|████▌     | 19/42 [00:47<00:58,  2.53s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256207573981512

	Validation acc: 0.8876488095238095

	Validation prec: 0.79052734375

	Validation rec: 0.8876488095238095

	Validation f1: 0.8355872184782249
loss: 


  1%|          | 1/83 [00:21<29:40, 21.71s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:26, 21.80s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:05<28:58, 21.73s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:34, 21.70s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:11, 21.69s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:10<27:47, 21.66s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:24, 21.63s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:53<27:02, 21.63s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:14<26:39, 21.62s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:36<26:19, 21.64s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:58<25:57, 21.63s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:19<25:37, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:41<25:16, 21.66s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:03<24:53, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:25<24:34, 21.69s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:46<24:12, 21.68s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:08<23:47, 21.63s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:29<23:26, 21.63s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:51<23:05, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:13<22:42, 21.63s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:34<22:22, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:56<22:00, 21.64s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:18<21:39, 21.67s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:39<21:17, 21.65s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:01<20:53, 21.61s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:22<20:32, 21.61s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:44<20:11, 21.64s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:06<19:48, 21.61s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:27<19:28, 21.64s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:49<19:07, 21.64s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:11<18:46, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:32<18:15, 21.48s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:53<17:46, 21.33s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:14<17:21, 21.25s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:35<16:57, 21.20s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:56<16:33, 21.14s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:17<16:12, 21.13s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:38<15:49, 21.11s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:59<15:27, 21.09s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:20<15:07, 21.10s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:41<14:44, 21.06s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:02<14:23, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:23<14:02, 21.06s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:44<13:40, 21.04s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:05<13:19, 21.05s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:26<12:59, 21.06s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:47<12:36, 21.01s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:09<12:16, 21.05s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:29<11:54, 21.02s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:51<11:35, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:12<11:14, 21.08s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:33<10:53, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:54<10:31, 21.07s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:15<10:11, 21.08s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:36<09:49, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:57<09:28, 21.07s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:18<09:07, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:39<08:47, 21.08s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:00<08:25, 21.04s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:21<08:03, 21.03s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:42<07:43, 21.08s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:04<07:23, 21.10s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:25<07:01, 21.08s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:46<06:40, 21.09s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:07<06:23, 21.28s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:29<06:03, 21.38s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:51<05:43, 21.49s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:13<05:24, 21.62s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:34<05:02, 21.63s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:56<04:42, 21.69s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:18<04:20, 21.71s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:40<03:59, 21.73s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:02<03:37, 21.76s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:23<03:15, 21.72s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:45<02:54, 21.75s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:07<02:32, 21.75s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:28<02:10, 21.69s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:50<01:48, 21.76s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:12<01:27, 21.75s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:34<01:05, 21.73s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:55<00:43, 21.75s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:17<00:21, 21.73s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:24<00:00, 21.26s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43619310353175705

	train acc: 0.8771084337349397

	training prec: 0.770882671310241

	training rec: 0.8771084337349397

	training f1: 0.8201595384242327

	Current Learning rate:  1.4285714285714285e-05



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:31,  2.48s/it][A
 14%|█▍        | 6/42 [00:15<01:27,  2.43s/it][A
 17%|█▋        | 7/42 [00:17<01:27,  2.50s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.56s/it][A
 21%|██▏       | 9/42 [00:22<01:24,  2.56s/it][A
 24%|██▍       | 10/42 [00:25<01:23,  2.59s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:30<01:17,  2.60s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.62s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:38<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:46<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:51<00:58,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256203245548975

	Validation acc: 0.8876488095238095

	Validation prec: 0.7907598586309523

	Validation rec: 0.8876488095238095

	Validation f1: 0.8356658867133081
loss: 


  1%|          | 1/83 [00:21<29:48, 21.81s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:23, 21.77s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:05<28:58, 21.73s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:27<28:38, 21.75s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:08, 21.65s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:10<27:45, 21.63s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:24, 21.64s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:53<27:01, 21.62s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:14<26:37, 21.59s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:36<26:18, 21.62s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:58<25:57, 21.63s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:19<25:34, 21.61s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:41<25:15, 21.65s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:03<24:53, 21.64s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:24<24:34, 21.68s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:46<24:13, 21.70s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:08<23:52, 21.70s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:30<23:33, 21.74s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:51<23:10, 21.73s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:13<22:49, 21.74s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:35<22:29, 21.76s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:57<22:05, 21.74s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:18<21:44, 21.74s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:40<21:23, 21.75s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:01<20:46, 21.50s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:22<20:18, 21.37s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:43<19:48, 21.23s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:04<19:20, 21.10s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:25<18:57, 21.07s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:46<18:33, 21.00s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:07<18:12, 21.01s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:28<17:51, 21.02s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:49<17:28, 20.98s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:10<17:08, 21.00s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:31<16:47, 21.00s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:52<16:26, 20.99s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:13<16:07, 21.03s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:34<15:46, 21.04s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:55<15:23, 20.99s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:16<15:03, 21.00s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:37<14:42, 21.02s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:58<14:29, 21.22s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:20<14:15, 21.39s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:42<13:58, 21.49s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:04<13:40, 21.59s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:26<13:21, 21.67s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:47<12:59, 21.67s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:09<12:38, 21.68s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:31<12:18, 21.72s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:53<11:56, 21.71s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:14<11:33, 21.68s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:36<11:12, 21.69s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:58<10:55, 21.85s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:20<10:35, 21.92s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:42<10:10, 21.79s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:03<09:46, 21.72s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:25<09:23, 21.66s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:46<08:58, 21.56s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:08<08:37, 21.57s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:29<08:15, 21.53s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:50<07:52, 21.47s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:12<07:30, 21.45s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:33<07:08, 21.45s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:55<06:46, 21.40s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:16<06:25, 21.41s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:37<06:03, 21.36s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:59<05:41, 21.37s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:20<05:20, 21.36s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:41<04:58, 21.30s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:02<04:34, 21.10s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:22<04:11, 20.99s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:43<03:49, 20.85s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:04<03:27, 20.79s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:25<03:08, 20.98s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:46<02:48, 21.02s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:07<02:26, 20.96s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:28<02:05, 20.89s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:49<01:44, 20.87s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:09<01:23, 20.86s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:30<01:02, 20.92s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:51<00:41, 20.90s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:12<00:20, 20.89s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:19<00:00, 21.19s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4357770455170827

	train acc: 0.8775225903614458

	training prec: 0.7710265672063252

	training rec: 0.8775225903614458

	training f1: 0.820574518229816

	Current Learning rate:  1.2857142857142857e-05



  2%|▏         | 1/42 [00:02<01:43,  2.53s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.52s/it][A
  7%|▋         | 3/42 [00:07<01:36,  2.47s/it][A
 10%|▉         | 4/42 [00:09<01:34,  2.49s/it][A
 12%|█▏        | 5/42 [00:12<01:32,  2.50s/it][A
 14%|█▍        | 6/42 [00:14<01:28,  2.47s/it][A
 17%|█▋        | 7/42 [00:17<01:27,  2.49s/it][A
 19%|█▉        | 8/42 [00:19<01:25,  2.51s/it][A
 21%|██▏       | 9/42 [00:22<01:21,  2.47s/it][A
 24%|██▍       | 10/42 [00:24<01:19,  2.49s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.55s/it][A
 31%|███       | 13/42 [00:32<01:14,  2.57s/it][A
 33%|███▎      | 14/42 [00:35<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:38<01:13,  2.72s/it][A
 38%|███▊      | 16/42 [00:41<01:10,  2.71s/it][A
 40%|████      | 17/42 [00:43<01:07,  2.71s/it][A
 43%|████▎     | 18/42 [00:46<01:05,  2.71s/it][A
 45%|████▌     | 19/42 [00:49<01:01,  2.68s/it][A
 48%|████▊     | 20/42 [00:51<00:59,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256199243522826

	Validation acc: 0.8876488095238095

	Validation prec: 0.7924339657738095

	Validation rec: 0.8876488095238095

	Validation f1: 0.8361825810007492
loss: 


  1%|          | 1/83 [00:20<28:40, 20.98s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:28, 21.09s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:01, 21.02s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:36, 20.96s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:17, 21.00s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:01, 21.06s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:36, 21.01s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:17, 21.03s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:09<25:57, 21.04s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:34, 21.02s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:51<25:15, 21.05s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:12<24:56, 21.07s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:33<24:37, 21.11s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:54<24:16, 21.10s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:15<23:53, 21.08s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:36<23:28, 21.03s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:57<23:07, 21.02s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:18<22:46, 21.03s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:39<22:27, 21.05s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:00<22:08, 21.09s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:21<21:44, 21.04s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:43<21:25, 21.08s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:04<21:05, 21.09s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:25<20:44, 21.10s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:46<20:25, 21.12s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:07<20:02, 21.10s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:28<19:42, 21.12s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:49<19:21, 21.12s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:10<18:59, 21.11s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:32<18:39, 21.12s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:53<18:18, 21.13s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:14<17:54, 21.06s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:35<17:34, 21.08s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:56<17:11, 21.05s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:17<16:52, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:38<16:31, 21.10s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:59<16:12, 21.14s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:21<15:52, 21.18s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:42<15:32, 21.18s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:03<15:12, 21.21s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:24<14:51, 21.22s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:45<14:28, 21.17s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:06<14:06, 21.16s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:28<13:45, 21.16s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:49<13:22, 21.13s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:10<13:02, 21.14s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:31<12:41, 21.15s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:52<12:19, 21.13s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:13<11:58, 21.15s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:34<11:37, 21.14s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:55<11:15, 21.11s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:17<10:55, 21.14s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:38<10:33, 21.11s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:59<10:12, 21.13s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:20<09:51, 21.13s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:41<09:29, 21.11s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:02<09:08, 21.11s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:23<08:47, 21.12s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:44<08:26, 21.09s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:06<08:05, 21.12s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:27<07:44, 21.10s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:48<07:23, 21.13s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:09<07:03, 21.17s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:30<06:41, 21.14s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:51<06:20, 21.13s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:12<05:59, 21.16s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:33<05:38, 21.13s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:55<05:16, 21.12s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:16<04:55, 21.09s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:37<04:34, 21.09s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:58<04:13, 21.12s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:19<03:52, 21.10s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:40<03:31, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:01<03:10, 21.13s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:23<02:49, 21.22s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:44<02:28, 21.21s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:05<02:07, 21.20s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:26<01:45, 21.16s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:47<01:24, 21.18s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:09<01:03, 21.19s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:30<00:42, 21.18s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:51<00:21, 21.20s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:58<00:00, 20.94s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.3633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43494553594704133

	train acc: 0.8783509036144579

	training prec: 0.7733423145707832

	training rec: 0.8783509036144579

	training f1: 0.8220269704403685

	Current Learning rate:  1.1428571428571429e-05



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.60s/it][A
 10%|▉         | 4/42 [00:10<01:36,  2.55s/it][A
 12%|█▏        | 5/42 [00:12<01:35,  2.57s/it][A
 14%|█▍        | 6/42 [00:15<01:32,  2.58s/it][A
 17%|█▋        | 7/42 [00:17<01:29,  2.55s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.56s/it][A
 21%|██▏       | 9/42 [00:23<01:24,  2.57s/it][A
 24%|██▍       | 10/42 [00:25<01:22,  2.58s/it][A
 26%|██▌       | 11/42 [00:28<01:18,  2.55s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.56s/it][A
 31%|███       | 13/42 [00:33<01:14,  2.57s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.52s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.53s/it][A
 40%|████      | 17/42 [00:43<01:02,  2.50s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.51s/it][A
 45%|████▌     | 19/42 [00:48<00:57,  2.52s/it][A
 48%|████▊     | 20/42 [00:50<00:54,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256196057512647

	Validation acc: 0.8876488095238095

	Validation prec: 0.7931315104166666

	Validation rec: 0.8876488095238095

	Validation f1: 0.8364053700521672
loss: 


  1%|          | 1/83 [00:20<28:34, 20.90s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:17, 20.96s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:53, 20.92s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:30, 20.89s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:08, 20.87s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:48, 20.89s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:25, 20.86s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:05, 20.87s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:47, 20.91s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:25, 20.89s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:06, 20.92s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:46, 20.94s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:23, 20.91s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<24:03, 20.92s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:42, 20.92s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:20, 20.90s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<23:02, 20.95s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:39, 20.92s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:18, 20.92s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:58, 20.92s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:34, 20.88s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:39<21:14, 20.89s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:01<21:03, 21.06s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:51, 21.21s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:44<20:37, 21.34s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:06<20:19, 21.39s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:27<20:01, 21.46s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:49<19:43, 21.51s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:10<19:23, 21.55s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:32<19:03, 21.58s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:54<18:42, 21.59s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:15<18:21, 21.59s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:37<17:59, 21.60s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:58<17:36, 21.56s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:20<17:16, 21.60s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:42<16:56, 21.63s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:03<16:35, 21.63s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:25<16:15, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:47<15:54, 21.69s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:09<15:32, 21.68s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:30<15:11, 21.70s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:52<14:51, 21.75s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:14<14:28, 21.72s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:36<14:07, 21.72s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:57<13:45, 21.71s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:19<13:22, 21.69s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:41<13:01, 21.72s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:02<12:39, 21.70s/it][A

tensor(0.3289, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:24<12:19, 21.75s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:46<11:57, 21.74s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:08<11:34, 21.71s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:29<11:13, 21.74s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:51<10:52, 21.74s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:12<10:25, 21.58s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:34<10:06, 21.65s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:56<09:45, 21.67s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:17<09:22, 21.65s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:39<09:01, 21.66s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:01<08:38, 21.61s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:22<08:17, 21.63s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:44<07:56, 21.65s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:05<07:33, 21.58s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:27<07:11, 21.59s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:48<06:47, 21.42s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:09<06:22, 21.26s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:30<06:00, 21.21s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:51<05:38, 21.16s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:12<05:16, 21.12s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:33<04:55, 21.09s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:54<04:33, 21.03s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:15<04:12, 21.03s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:36<03:51, 21.00s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:57<03:29, 20.96s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:18<03:08, 20.96s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:39<02:47, 20.95s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:00<02:26, 20.91s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:21<02:05, 20.94s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:41<01:44, 20.92s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:02<01:23, 20.93s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:23<01:02, 20.93s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:44<00:41, 20.92s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:05<00:20, 20.92s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:12<00:00, 21.11s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.3633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4349456630557416

	train acc: 0.8783509036144579

	training prec: 0.7732070077183736

	training rec: 0.8783509036144579

	training f1: 0.8219843881586506

	Current Learning rate:  1e-05



  2%|▏         | 1/42 [00:02<01:40,  2.45s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.52s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.54s/it][A
 10%|▉         | 4/42 [00:10<01:34,  2.50s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.52s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.54s/it][A
 17%|█▋        | 7/42 [00:17<01:29,  2.55s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.52s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.53s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.55s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.52s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.54s/it][A
 31%|███       | 13/42 [00:32<01:14,  2.55s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.53s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.54s/it][A
 38%|███▊      | 16/42 [00:40<01:06,  2.55s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.53s/it][A
 43%|████▎     | 18/42 [00:45<01:01,  2.54s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.56s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4376723879859561

	Validation acc: 0.8755952380952381

	Validation prec: 0.7744568452380951

	Validation rec: 0.8755952380952381

	Validation f1: 0.8204259963417991
loss: 


  1%|          | 1/83 [00:21<28:43, 21.02s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:13, 20.91s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:55, 20.94s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:32, 20.92s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:09, 20.89s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:49, 20.90s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:28, 20.91s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:05, 20.87s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:47, 20.91s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:21, 20.85s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:02, 20.87s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:43, 20.90s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:22, 20.89s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<24:02, 20.90s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:42, 20.91s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:19, 20.89s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<23:02, 20.94s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:40, 20.92s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:16, 20.89s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:57<21:56, 20.89s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:18<21:34, 20.88s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:39<21:14, 20.89s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:00<20:52, 20.88s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:21<20:27, 20.81s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:42<20:17, 20.98s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:03<20:02, 21.09s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:43, 21.13s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:26, 21.22s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<19:07, 21.25s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:29<18:45, 21.25s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:50<18:25, 21.27s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:11<18:04, 21.26s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:33<17:44, 21.29s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:54<17:23, 21.30s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:15<17:00, 21.27s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:36<16:40, 21.28s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:58<16:19, 21.30s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:19<15:55, 21.24s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:40<15:31, 21.17s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:01<15:07, 21.09s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:22<14:45, 21.08s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:43<14:23, 21.07s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:04<14:02, 21.06s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:25<13:41, 21.07s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:46<13:20, 21.07s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:07<12:59, 21.06s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:28<12:38, 21.07s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:49<12:18, 21.10s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:10<11:55, 21.04s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:32<11:35, 21.09s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:53<11:15, 21.09s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:15<11:03, 21.41s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:36<10:40, 21.34s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:57<10:16, 21.27s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:18<09:55, 21.25s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:40<09:33, 21.26s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:01<09:11, 21.20s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:22<08:49, 21.18s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:44<08:36, 21.53s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:06<08:20, 21.77s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:28<07:56, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:49<07:31, 21.49s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:11<07:16, 21.81s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:32<06:50, 21.58s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:54<06:25, 21.44s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:15<06:03, 21.36s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:36<05:40, 21.27s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:57<05:18, 21.26s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:18<04:57, 21.24s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:40<04:38, 21.44s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:03<04:20, 21.74s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:24<03:56, 21.52s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:45<03:34, 21.47s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:06<03:13, 21.48s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:28<02:53, 21.64s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:50<02:31, 21.65s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:12<02:10, 21.67s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:34<01:48, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:55<01:26, 21.64s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:17<01:04, 21.64s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:38<00:43, 21.60s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:00<00:21, 21.60s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:07<00:00, 21.05s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4353583889553346

	train acc: 0.8779367469879519

	training prec: 0.7723249246987952

	training rec: 0.8779367469879519

	training f1: 0.8213424637573776

	Current Learning rate:  8.571428571428573e-06



  2%|▏         | 1/42 [00:02<01:49,  2.66s/it][A
  5%|▍         | 2/42 [00:05<01:43,  2.59s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.61s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.61s/it][A
 26%|██▌       | 11/42 [00:28<01:19,  2.56s/it][A
 29%|██▊       | 12/42 [00:31<01:16,  2.56s/it][A
 31%|███       | 13/42 [00:33<01:14,  2.57s/it][A
 33%|███▎      | 14/42 [00:36<01:10,  2.53s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.55s/it][A
 38%|███▊      | 16/42 [00:41<01:06,  2.56s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.52s/it][A
 43%|████▎     | 18/42 [00:46<01:00,  2.54s/it][A
 45%|████▌     | 19/42 [00:49<00:58,  2.55s/it][A
 48%|████▊     | 20/42 [00:51<00:56,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4296367629652932

	Validation acc: 0.8836309523809524

	Validation prec: 0.7838857886904762

	Validation rec: 0.8836309523809524

	Validation f1: 0.8299758339968119
loss: 


  1%|          | 1/83 [00:21<28:51, 21.12s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:28, 21.09s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:08, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:44, 21.07s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:19, 21.02s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<26:57, 21.00s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:34, 20.97s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:10, 20.94s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:49, 20.94s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:28, 20.94s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:05, 20.91s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:46, 20.93s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:24, 20.92s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:05, 20.95s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:42, 20.92s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:19, 20.88s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<22:59, 20.91s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:39, 20.92s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:17, 20.90s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<22:09, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:21<22:02, 21.33s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:42<21:32, 21.18s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:04<21:29, 21.50s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:26<21:09, 21.51s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:47<20:52, 21.60s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:09<20:24, 21.48s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:31<20:21, 21.81s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:55<20:30, 22.37s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:17<20:00, 22.22s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:38<19:22, 21.93s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:59<18:49, 21.72s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:22<18:37, 21.90s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:45<18:34, 22.29s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:07<18:06, 22.18s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:28<17:30, 21.89s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:49<17:00, 21.72s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:10<16:32, 21.58s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:33<16:27, 21.94s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:56<16:11, 22.07s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:17<15:41, 21.90s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:39<15:13, 21.74s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:00<14:48, 21.67s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:21<14:23, 21.59s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:43<14:00, 21.55s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:04<13:38, 21.53s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:26<13:14, 21.48s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:47<12:53, 21.49s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:09<12:32, 21.50s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:30<12:08, 21.43s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:51<11:46, 21.42s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:13<11:24, 21.41s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:34<11:05, 21.45s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:56<10:43, 21.46s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:17<10:21, 21.43s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:40<10:07, 21.71s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:03<10:02, 22.33s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:26<09:42, 22.40s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:48<09:18, 22.33s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:11<08:58, 22.42s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:32<08:30, 22.21s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:54<08:06, 22.14s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:16<07:43, 22.06s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:38<07:18, 21.95s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:00<06:56, 21.93s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [23:22<06:35, 21.96s/it][A

loss: 


 80%|███████▉  | 66/83 [23:44<06:13, 21.99s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:06<05:50, 21.93s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:27<05:28, 21.88s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:49<05:06, 21.88s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:11<04:44, 21.86s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:33<04:22, 21.86s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:55<04:00, 21.86s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:16<03:36, 21.68s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:37<03:14, 21.57s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:59<02:52, 21.51s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:21<02:31, 21.65s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:43<02:10, 21.69s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:04<01:48, 21.70s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:26<01:26, 21.67s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:48<01:05, 21.69s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:09<00:43, 21.69s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:31<00:21, 21.68s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:38<00:00, 21.43s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4353568751409829

	train acc: 0.8779367469879519

	training prec: 0.7720072477409639

	training rec: 0.8779367469879519

	training f1: 0.8212482053923291

	Current Learning rate:  7.142857142857143e-06



  2%|▏         | 1/42 [00:02<01:49,  2.67s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.66s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.65s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.65s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.425618860693205

	Validation acc: 0.8876488095238095

	Validation prec: 0.7922479538690477

	Validation rec: 0.8876488095238095

	Validation f1: 0.8361218342739216
loss: 


  1%|          | 1/83 [00:21<29:08, 21.32s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:49, 21.35s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:21, 21.27s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:25<28:01, 21.28s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:46<27:39, 21.28s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:07<27:14, 21.22s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:54, 21.24s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:50<26:33, 21.24s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:11<26:07, 21.18s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:32<25:46, 21.19s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:53<25:24, 21.17s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:14<25:04, 21.19s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:35<24:44, 21.21s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:57<24:21, 21.18s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:18<24:05, 21.26s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:39<23:44, 21.26s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:01<23:22, 21.26s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:22<23:04, 21.29s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:43<22:43, 21.31s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:05<22:21, 21.30s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:26<22:00, 21.30s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:47<21:38, 21.28s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:08<21:18, 21.31s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:30<20:59, 21.35s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:51<20:37, 21.34s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:12<20:15, 21.33s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:34<19:55, 21.35s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:55<19:32, 21.32s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:17<19:12, 21.35s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:38<18:50, 21.33s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:59<18:30, 21.36s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:21<18:09, 21.36s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:42<17:47, 21.35s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:03<17:27, 21.39s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:25<17:13, 21.52s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:47<16:51, 21.52s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:08<16:29, 21.50s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:30<16:06, 21.48s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:51<15:41, 21.41s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:12<15:18, 21.37s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:33<14:53, 21.28s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:55<14:32, 21.27s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:16<14:10, 21.26s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:37<13:46, 21.20s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:58<13:25, 21.21s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:19<13:03, 21.18s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:40<12:41, 21.14s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:01<12:19, 21.13s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:22<11:57, 21.09s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:43<11:35, 21.09s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:04<11:14, 21.09s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:25<10:52, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:47<10:33, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:08<10:12, 21.11s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:29<09:50, 21.08s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:50<09:29, 21.10s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:11<09:08, 21.08s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:32<08:47, 21.10s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:53<08:26, 21.10s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:14<08:04, 21.08s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:35<07:43, 21.09s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:57<07:23, 21.12s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:18<07:01, 21.10s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:39<06:40, 21.09s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:00<06:19, 21.09s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:21<05:58, 21.06s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:42<05:37, 21.07s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:03<05:15, 21.06s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:24<04:53, 20.96s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:45<04:32, 20.93s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:05<04:10, 20.89s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:26<03:50, 20.91s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:47<03:29, 20.94s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:08<03:08, 20.96s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:29<02:47, 21.00s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:50<02:27, 21.03s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:11<02:06, 21.02s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:33<01:45, 21.05s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:54<01:24, 21.08s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:15<01:03, 21.06s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:36<00:42, 21.07s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:57<00:21, 21.03s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:03<00:00, 21.01s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43618344327053393

	train acc: 0.8771084337349397

	training prec: 0.7703179122740964

	training rec: 0.8771084337349397

	training f1: 0.8199883384347364

	Current Learning rate:  5.7142857142857145e-06



  2%|▏         | 1/42 [00:02<01:44,  2.55s/it][A
  5%|▍         | 2/42 [00:05<01:41,  2.53s/it][A
  7%|▋         | 3/42 [00:07<01:36,  2.48s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.51s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:29,  2.49s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.52s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.53s/it][A
 21%|██▏       | 9/42 [00:22<01:22,  2.50s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.53s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.54s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.52s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.53s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.55s/it][A
 36%|███▌      | 15/42 [00:37<01:08,  2.54s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.52s/it][A
 40%|████      | 17/42 [00:42<01:03,  2.54s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.51s/it][A
 45%|████▌     | 19/42 [00:47<00:58,  2.53s/it][A
 48%|████▊     | 20/42 [00:50<00:56,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4336541224093664

	Validation acc: 0.8796130952380953

	Validation prec: 0.7802185639880952

	Validation rec: 0.8796130952380953

	Validation f1: 0.8253586836561647
loss: 


  1%|          | 1/83 [00:21<28:57, 21.19s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<29:02, 21.51s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:05<29:13, 21.92s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:28<29:21, 22.30s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:51<29:30, 22.70s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:14<29:07, 22.69s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:35<28:08, 22.21s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:56<27:22, 21.90s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:17<26:42, 21.65s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:39<26:10, 21.51s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:00<25:40, 21.40s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:21<25:09, 21.26s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:42<24:45, 21.22s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:03<24:19, 21.15s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:24<23:57, 21.14s/it][A

tensor(0.5789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:46<23:50, 21.35s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:07<23:35, 21.45s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:29<23:19, 21.53s/it][A

tensor(0.5632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:51<23:00, 21.58s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:13<22:48, 21.71s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:35<22:34, 21.85s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:57<22:10, 21.80s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:19<21:48, 21.80s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:40<21:27, 21.82s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:02<20:54, 21.64s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:24<20:45, 21.85s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:45<20:14, 21.68s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:06<19:44, 21.54s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:28<19:21, 21.52s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:49<18:59, 21.49s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:11<18:37, 21.49s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:32<18:16, 21.49s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:54<17:53, 21.47s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:15<17:32, 21.49s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:37<17:12, 21.51s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:58<16:49, 21.48s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:20<16:29, 21.50s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:41<16:07, 21.50s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:03<15:44, 21.48s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:24<15:26, 21.54s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:46<15:06, 21.57s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:07<14:41, 21.50s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:29<14:19, 21.49s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:50<13:56, 21.45s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:12<13:37, 21.51s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:33<13:15, 21.50s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:55<12:52, 21.46s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:16<12:31, 21.47s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:38<12:10, 21.48s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:59<11:46, 21.42s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:20<11:25, 21.41s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:43<11:12, 21.69s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:05<10:59, 21.98s/it][A

tensor(0.4851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:28<10:46, 22.28s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:52<10:33, 22.63s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:14<10:05, 22.43s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:36<09:39, 22.30s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:58<09:20, 22.42s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:21<08:59, 22.46s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:44<08:38, 22.54s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [22:07<08:18, 22.65s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:28<07:50, 22.41s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:50<07:23, 22.19s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:12<06:58, 22.04s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:33<06:34, 21.92s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:55<06:11, 21.85s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:17<05:49, 21.84s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:39<05:28, 21.88s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [25:01<05:05, 21.84s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:23<04:43, 21.84s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:44<04:22, 21.84s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [26:06<03:59, 21.80s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:28<03:38, 21.82s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:50<03:16, 21.81s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:12<02:54, 21.84s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:34<02:32, 21.85s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:55<02:10, 21.80s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:17<01:49, 21.81s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:39<01:27, 21.88s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [29:00<01:05, 21.71s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:22<00:43, 21.63s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:43<00:21, 21.57s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:50<00:00, 21.57s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5632, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43659845317702695

	train acc: 0.8766942771084337

	training prec: 0.7706342949924698

	training rec: 0.8766942771084337

	training f1: 0.8197269768771257

	Current Learning rate:  4.285714285714286e-06



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.63s/it][A
  7%|▋         | 3/42 [00:07<01:40,  2.58s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.60s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:32,  2.57s/it][A
 17%|█▋        | 7/42 [00:18<01:30,  2.59s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.60s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.58s/it][A
 24%|██▍       | 10/42 [00:25<01:23,  2.59s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.61s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.59s/it][A
 33%|███▎      | 14/42 [00:36<01:12,  2.60s/it][A
 36%|███▌      | 15/42 [00:38<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:41<01:06,  2.58s/it][A
 40%|████      | 17/42 [00:44<01:04,  2.60s/it][A
 43%|████▎     | 18/42 [00:46<01:02,  2.60s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.58s/it][A
 48%|████▊     | 20/42 [00:51<00:57,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256185782807214

	Validation acc: 0.8876488095238095

	Validation prec: 0.7908063616071429

	Validation rec: 0.8876488095238095

	Validation f1: 0.8356786538402154
loss: 


  1%|          | 1/83 [00:21<29:01, 21.24s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:45, 21.30s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:26, 21.33s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:25<28:00, 21.27s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:46<27:42, 21.31s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:07<27:25, 21.37s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:29<26:59, 21.31s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:50<26:37, 21.30s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:11<26:16, 21.30s/it][A

tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:32<25:52, 21.26s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:54<25:31, 21.27s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:15<25:08, 21.25s/it][A

tensor(0.3289, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:36<24:49, 21.28s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:58<24:27, 21.27s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:19<24:04, 21.25s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:40<23:44, 21.26s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:01<23:23, 21.27s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:23<23:01, 21.25s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:44<22:41, 21.27s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:05<22:19, 21.26s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:26<21:56, 21.24s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:48<21:37, 21.27s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:09<21:15, 21.25s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:30<20:54, 21.26s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:51<20:32, 21.25s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:12<20:08, 21.20s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:34<19:48, 21.22s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:55<19:26, 21.22s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:16<19:04, 21.19s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:37<18:45, 21.24s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:59<18:25, 21.25s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:20<18:01, 21.21s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:41<17:40, 21.22s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:02<17:19, 21.22s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:25<17:17, 21.61s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:46<16:55, 21.61s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:08<16:28, 21.49s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:29<16:04, 21.44s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:50<15:42, 21.42s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:11<15:18, 21.36s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:33<14:56, 21.35s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:55<14:42, 21.53s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:17<14:27, 21.68s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:39<14:08, 21.75s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:00<13:46, 21.76s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:22<13:26, 21.81s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:44<12:58, 21.64s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:05<12:32, 21.50s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:26<12:09, 21.46s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:48<11:48, 21.47s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:09<11:25, 21.44s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:31<11:05, 21.46s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:52<10:43, 21.45s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:13<10:22, 21.48s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:35<10:01, 21.50s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:57<09:40, 21.49s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:18<09:18, 21.48s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:39<08:56, 21.48s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:01<08:34, 21.44s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:22<08:13, 21.44s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:44<07:51, 21.42s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:05<07:30, 21.46s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:28<07:16, 21.84s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:50<06:56, 21.94s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:12<06:34, 21.90s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:34<06:11, 21.88s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:56<05:50, 21.88s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:18<05:30, 22.00s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:39<05:06, 21.86s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:01<04:42, 21.74s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:23<04:22, 21.88s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:45<04:00, 21.88s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:06<03:37, 21.74s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:28<03:14, 21.61s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:49<02:52, 21.50s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:10<02:30, 21.47s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:32<02:09, 21.66s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:55<01:49, 21.81s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:17<01:27, 21.92s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:39<01:05, 21.99s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:01<00:43, 21.96s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:23<00:21, 21.91s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:29<00:00, 21.32s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43576937985707476

	train acc: 0.8775225903614458

	training prec: 0.7717501647213855

	training rec: 0.8775225903614458

	training f1: 0.8207946001124418

	Current Learning rate:  2.8571428571428573e-06



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.68s/it][A
  7%|▋         | 3/42 [00:08<01:44,  2.68s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.67s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.63s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.65s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.65s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256184853258587

	Validation acc: 0.8876488095238095

	Validation prec: 0.7909458705357143

	Validation rec: 0.8876488095238095

	Validation f1: 0.8357167037257734
loss: 


  1%|          | 1/83 [00:21<29:51, 21.84s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:19, 21.72s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:47, 21.59s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:33, 21.69s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:04, 21.59s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:10<27:49, 21.69s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:27, 21.68s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:53<27:11, 21.75s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:15<26:52, 21.79s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:37<26:30, 21.79s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:59<26:11, 21.82s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:21<25:50, 21.84s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:42<25:27, 21.83s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:04<25:07, 21.85s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:26<24:45, 21.84s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:48<24:20, 21.80s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:10<23:59, 21.81s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:31<23:36, 21.79s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:53<23:15, 21.81s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:15<22:54, 21.82s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:37<22:32, 21.82s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:59<22:11, 21.83s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:21<21:50, 21.84s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:42<21:25, 21.79s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:04<21:03, 21.78s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:26<20:40, 21.76s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:48<20:19, 21.77s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:09<19:58, 21.78s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:31<19:34, 21.75s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:53<19:13, 21.76s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:15<18:53, 21.79s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:36<18:31, 21.80s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:58<18:11, 21.83s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:20<17:44, 21.73s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:42<17:22, 21.71s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [13:03<17:01, 21.74s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:25<16:38, 21.70s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:47<16:17, 21.71s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:09<15:57, 21.75s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:30<15:33, 21.72s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:52<15:11, 21.70s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:14<14:49, 21.70s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:35<14:26, 21.67s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:56<14:00, 21.54s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:18<13:35, 21.46s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:39<13:10, 21.36s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [17:00<12:48, 21.35s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:21<12:25, 21.29s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:43<12:04, 21.31s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:04<11:42, 21.30s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:25<11:19, 21.24s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:46<10:59, 21.26s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:08<10:37, 21.26s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:29<10:15, 21.22s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:50<09:54, 21.24s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:11<09:35, 21.31s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:33<09:14, 21.32s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:54<08:54, 21.38s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:16<08:33, 21.39s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:37<08:12, 21.43s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:59<07:51, 21.45s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:20<07:30, 21.44s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:42<07:08, 21.45s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:03<06:47, 21.45s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:24<06:25, 21.42s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:46<06:04, 21.45s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:07<05:43, 21.45s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:29<05:20, 21.35s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:50<04:58, 21.32s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:11<04:36, 21.25s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:32<04:14, 21.24s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:53<03:53, 21.25s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:14<03:32, 21.22s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:36<03:10, 21.22s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:57<02:49, 21.22s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:18<02:28, 21.16s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:39<02:07, 21.19s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:00<01:45, 21.18s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:22<01:24, 21.22s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:43<01:03, 21.23s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:04<00:42, 21.20s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:25<00:21, 21.23s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:32<00:00, 21.36s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43577013496892997

	train acc: 0.8775225903614458

	training prec: 0.7718972373870482

	training rec: 0.8775225903614458

	training f1: 0.8208462269736887

	Current Learning rate:  1.4285714285714286e-06



  2%|▏         | 1/42 [00:02<01:42,  2.50s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.56s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.59s/it][A
 10%|▉         | 4/42 [00:10<01:37,  2.55s/it][A
 12%|█▏        | 5/42 [00:12<01:35,  2.57s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.59s/it][A
 17%|█▋        | 7/42 [00:18<01:30,  2.60s/it][A
 19%|█▉        | 8/42 [00:20<01:27,  2.56s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.58s/it][A
 24%|██▍       | 10/42 [00:25<01:22,  2.59s/it][A
 26%|██▌       | 11/42 [00:28<01:19,  2.56s/it][A
 29%|██▊       | 12/42 [00:30<01:17,  2.58s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.59s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.56s/it][A
 36%|███▌      | 15/42 [00:38<01:09,  2.58s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.59s/it][A
 40%|████      | 17/42 [00:43<01:04,  2.56s/it][A
 43%|████▎     | 18/42 [00:46<01:01,  2.58s/it][A
 45%|████▌     | 19/42 [00:48<00:59,  2.59s/it][A
 48%|████▊     | 20/42 [00:51<00:56,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4296361456314723

	Validation acc: 0.8836309523809524

	Validation prec: 0.7831417410714285

	Validation rec: 0.8836309523809524

	Validation f1: 0.8297432871960618
loss: 


  1%|          | 1/83 [00:21<29:21, 21.48s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:50, 21.37s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:27, 21.34s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:25<28:06, 21.35s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:46<27:42, 21.31s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:08<27:21, 21.31s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:29<26:59, 21.30s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:50<26:32, 21.23s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:11<26:11, 21.23s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:32<25:48, 21.22s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:54<25:28, 21.23s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:15<25:08, 21.25s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:36<24:46, 21.24s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:57<24:26, 21.25s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:19<24:05, 21.26s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:40<23:42, 21.23s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:01<23:19, 21.20s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:22<23:02, 21.26s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:44<22:57, 21.52s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:06<22:41, 21.61s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:28<22:21, 21.63s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:50<22:03, 21.69s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:12<21:42, 21.71s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:33<21:18, 21.67s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:55<20:54, 21.63s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:16<20:30, 21.58s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:38<20:06, 21.54s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:59<19:43, 21.52s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:21<19:23, 21.54s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:42<19:00, 21.53s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:04<18:41, 21.56s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:25<18:18, 21.54s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:47<17:58, 21.58s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:08<17:35, 21.55s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:30<17:13, 21.53s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:52<16:57, 21.65s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:14<16:37, 21.68s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:35<16:15, 21.68s/it][A

tensor(0.4226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:57<15:55, 21.71s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:19<15:33, 21.72s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:41<15:15, 21.79s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:02<14:47, 21.64s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:23<14:20, 21.52s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:44<13:56, 21.45s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:06<13:32, 21.39s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:27<13:09, 21.33s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:48<12:47, 21.33s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:09<12:25, 21.30s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:31<12:03, 21.27s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:52<11:42, 21.28s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:13<11:19, 21.25s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:35<10:59, 21.29s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:56<10:38, 21.28s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:17<10:16, 21.25s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:38<09:55, 21.26s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:00<09:34, 21.26s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:21<09:12, 21.24s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:42<08:51, 21.26s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:03<08:29, 21.24s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:25<08:09, 21.26s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:46<07:47, 21.27s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:07<07:26, 21.24s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:28<07:05, 21.26s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:50<06:44, 21.29s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:11<06:22, 21.25s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:32<06:01, 21.27s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:53<05:39, 21.25s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:15<05:18, 21.25s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:36<04:57, 21.24s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:57<04:36, 21.24s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:18<04:15, 21.26s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:40<03:53, 21.26s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:01<03:32, 21.24s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:22<03:11, 21.26s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:43<02:50, 21.26s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:05<02:28, 21.24s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:26<02:07, 21.24s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:47<01:46, 21.24s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:08<01:24, 21.20s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:29<01:03, 21.22s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:51<00:42, 21.18s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:12<00:21, 21.23s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:19<00:00, 21.19s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5632, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.43659774546163627

	train acc: 0.8766942771084337

	training prec: 0.7705695830195783

	training rec: 0.8766942771084337

	training f1: 0.8196970803670105

	Current Learning rate:  0.0



  2%|▏         | 1/42 [00:02<01:46,  2.59s/it][A
  5%|▍         | 2/42 [00:05<01:41,  2.53s/it][A
  7%|▋         | 3/42 [00:07<01:40,  2.57s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.59s/it][A
 12%|█▏        | 5/42 [00:12<01:34,  2.55s/it][A
 14%|█▍        | 6/42 [00:15<01:32,  2.57s/it][A
 17%|█▋        | 7/42 [00:18<01:30,  2.59s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.56s/it][A
 21%|██▏       | 9/42 [00:23<01:24,  2.58s/it][A
 24%|██▍       | 10/42 [00:25<01:22,  2.59s/it][A
 26%|██▌       | 11/42 [00:28<01:19,  2.56s/it][A
 29%|██▊       | 12/42 [00:30<01:17,  2.57s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.59s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.56s/it][A
 36%|███▌      | 15/42 [00:38<01:09,  2.58s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.59s/it][A
 40%|████      | 17/42 [00:43<01:04,  2.56s/it][A
 43%|████▎     | 18/42 [00:46<01:01,  2.58s/it][A
 45%|████▌     | 19/42 [00:48<00:59,  2.59s/it][A
 48%|████▊     | 20/42 [00:51<00:57,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4296361200866245

	Validation acc: 0.8836309523809524

	Validation prec: 0.7839322916666667

	Validation rec: 0.8836309523809524

	Validation f1: 0.8299783008928466
loss: 


  1%|          | 1/83 [00:21<29:33, 21.62s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:01, 21.49s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:39, 21.49s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:25<28:17, 21.48s/it][A

tensor(0.3289, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:47<27:49, 21.40s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:08<27:28, 21.41s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:30<27:06, 21.41s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:51<26:39, 21.32s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:12<26:16, 21.31s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:33<25:56, 21.33s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:55<25:34, 21.31s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:16<25:13, 21.31s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:37<24:49, 21.28s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:59<24:31, 21.33s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:20<24:11, 21.34s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:41<23:50, 21.35s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:03<23:30, 21.38s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:24<23:11, 21.40s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:46<22:55, 21.49s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:08<22:39, 21.58s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:29<22:19, 21.61s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:51<22:01, 21.66s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:13<21:41, 21.69s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:35<21:18, 21.67s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:56<20:59, 21.72s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:18<20:38, 21.73s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:40<20:16, 21.73s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:02<19:56, 21.75s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:23<19:33, 21.74s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:45<19:12, 21.74s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:07<18:51, 21.75s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:29<18:28, 21.73s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:50<18:08, 21.77s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:12<17:48, 21.80s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:34<17:24, 21.76s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:56<17:03, 21.79s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:17<16:40, 21.74s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:39<16:16, 21.70s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:01<15:54, 21.70s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:22<15:33, 21.70s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:44<15:10, 21.68s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:06<14:56, 21.85s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:28<14:32, 21.82s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:50<14:10, 21.81s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:12<13:49, 21.82s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:33<13:25, 21.78s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:55<13:04, 21.80s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:17<12:42, 21.78s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:39<12:18, 21.74s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:00<11:57, 21.76s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:22<11:35, 21.74s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:44<11:13, 21.73s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:06<10:52, 21.75s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:28<10:37, 21.97s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:50<10:16, 22.00s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:12<09:52, 21.94s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:34<09:28, 21.86s/it][A

tensor(0.4851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:55<09:05, 21.82s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:17<08:43, 21.79s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:39<08:19, 21.73s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [22:00<07:57, 21.72s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:22<07:35, 21.71s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:44<07:13, 21.67s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:05<06:49, 21.56s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:26<06:26, 21.46s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:47<06:03, 21.40s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:09<05:42, 21.38s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:30<05:20, 21.34s/it][A

tensor(0.4851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:52<05:00, 21.49s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:13<04:38, 21.42s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:34<04:15, 21.33s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:56<03:54, 21.31s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:17<03:32, 21.28s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:38<03:11, 21.28s/it][A

tensor(0.3446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:59<02:50, 21.28s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:20<02:28, 21.24s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:42<02:07, 21.24s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:03<01:46, 21.28s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:24<01:24, 21.24s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:45<01:03, 21.25s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:07<00:42, 21.25s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:28<00:21, 21.21s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:35<00:00, 21.39s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4633, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4357681820191533

	train acc: 0.8775225903614458

	training prec: 0.771673686935241

	training rec: 0.8775225903614458

	training f1: 0.8207719639737354

	Current Learning rate:  0.0



  2%|▏         | 1/42 [00:02<01:47,  2.62s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.60s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.56s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.59s/it][A
 12%|█▏        | 5/42 [00:12<01:35,  2.57s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.60s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:20<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.60s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.60s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.59s/it][A
 29%|██▊       | 12/42 [00:31<01:16,  2.55s/it][A
 31%|███       | 13/42 [00:33<01:13,  2.55s/it][A
 33%|███▎      | 14/42 [00:36<01:11,  2.55s/it][A
 36%|███▌      | 15/42 [00:38<01:07,  2.51s/it][A
 38%|███▊      | 16/42 [00:41<01:05,  2.52s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.53s/it][A
 43%|████▎     | 18/42 [00:46<01:00,  2.50s/it][A
 45%|████▌     | 19/42 [00:48<00:57,  2.51s/it][A
 48%|████▊     | 20/42 [00:51<00:55,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.42561840088594527

	Validation acc: 0.8876488095238095

	Validation prec: 0.7906668526785714

	Validation rec: 0.8876488095238095

	Validation f1: 0.8356417526106776
loss: 


  1%|          | 1/83 [00:21<28:59, 21.21s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:33, 21.15s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:08, 21.10s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:50, 21.15s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:34, 21.98s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:11<28:51, 22.49s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:35<28:55, 22.84s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:00<29:28, 23.58s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:25<29:36, 24.00s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:48<29:02, 23.87s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:10<27:52, 23.23s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:32<26:55, 22.76s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:53<26:09, 22.42s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:15<25:27, 22.14s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:36<24:54, 21.97s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:58<24:24, 21.86s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:19<23:53, 21.72s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:41<23:28, 21.66s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [07:03<23:14, 21.79s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:25<23:02, 21.95s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:47<22:38, 21.91s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [08:11<22:42, 22.34s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:34<22:42, 22.71s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:57<22:25, 22.80s/it][A

loss: 


 30%|███       | 25/83 [09:19<21:40, 22.43s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:42<21:37, 22.76s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [10:05<21:15, 22.78s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:28<20:50, 22.73s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:51<20:33, 22.84s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [11:14<20:09, 22.82s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:35<19:24, 22.40s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:57<19:01, 22.38s/it][A

loss: tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [12:20<18:46, 22.53s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:43<18:23, 22.52s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [13:05<17:54, 22.40s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [13:26<17:21, 22.15s/it][A

tensor(0.3758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:48<16:53, 22.03s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [14:10<16:27, 21.94s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:31<15:59, 21.81s/it][A

tensor(0.3602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:53<15:37, 21.80s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [15:15<15:12, 21.73s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:36<14:49, 21.70s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:58<14:27, 21.69s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [16:20<14:04, 21.65s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:41<13:43, 21.66s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [17:03<13:21, 21.66s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [17:24<12:59, 21.65s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:46<12:37, 21.65s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [18:08<12:14, 21.62s/it][A

tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:29<11:54, 21.64s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:51<11:32, 21.64s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [19:13<11:10, 21.62s/it][A

tensor(0.4851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:34<10:48, 21.62s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:56<10:26, 21.61s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [20:17<10:03, 21.54s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:39<09:40, 21.51s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [21:00<09:18, 21.47s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [21:21<08:56, 21.48s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:43<08:36, 21.50s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [22:04<08:14, 21.49s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [22:26<07:53, 21.51s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:48<07:32, 21.54s/it][A

tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [23:09<07:10, 21.52s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:31<06:49, 21.53s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:52<06:28, 21.56s/it][A

tensor(0.4071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [24:14<06:07, 21.61s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:36<05:46, 21.68s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:58<05:26, 21.78s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [25:20<05:04, 21.75s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:41<04:42, 21.73s/it][A

tensor(0.4227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [26:03<04:20, 21.69s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [26:25<03:58, 21.69s/it][A

tensor(0.3914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:46<03:37, 21.73s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [27:08<03:15, 21.70s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:30<02:53, 21.71s/it][A

tensor(0.4695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:51<02:31, 21.70s/it][A

tensor(0.4539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [28:13<02:10, 21.67s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:35<01:48, 21.70s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:56<01:26, 21.70s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [29:18<01:04, 21.66s/it][A

tensor(0.4383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:40<00:43, 21.66s/it][A

tensor(0.4852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [30:01<00:21, 21.64s/it][A

tensor(0.4070, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [30:08<00:00, 21.79s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4133, device='cuda:0', grad_fn=<NllLossBackward>)

	Training MCC Accuracy: 0.0

	Train loss: 0.4353540528969592

	train acc: 0.8779367469879519

	training prec: 0.7722955101656627

	training rec: 0.8779367469879519

	training f1: 0.8213389265373052

	Current Learning rate:  0.0



  2%|▏         | 1/42 [00:02<01:49,  2.67s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.61s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.62s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:14,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:01,  2.57s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.58s/it][A
 48%|████▊     | 20/42 [00:52<00:56,  2


	Validation MCC Accuracy: 0.0

	Validation loss: 0.4256184094008945

	Validation acc: 0.8876488095238095

	Validation prec: 0.7907598586309523

	Validation rec: 0.8876488095238095

	Validation f1: 0.8356668417231058





In [33]:
# train_loss.numpy()
# train_loss


In [34]:
# # Saving models
# torch.save(model.state_dict(), "finetuned.pth")

# #load models
# model = Neural

In [50]:
## saving the model 
torch.save(model.state_dict(), "finetuned-35-epochs.pth")

In [51]:
# loading the locally saved model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = CausalityBERT()
model.load_state_dict(torch.load("finetuned-35-epochs.pth"))
## Move the model to the GPU 
model.to(device)
model.eval() # gettign in the eval mode 



You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.0.attention.self.key.weight', 'roberta.encoder.layer.11.attention.self.key.bias', 'roberta.encoder.layer.6.attention.output.LayerNorm.weight', 'roberta.encoder.layer.7.intermediate.dense.bias', 'roberta.encoder.layer.4.attention.self.key.bias', 'roberta.encoder.layer.7.attention.self.value.bias', 'roberta.encoder.layer.8.attention.self.query.bias', 'roberta.encoder.layer.8.attention.self.value.weight', 'roberta.encoder.layer.9.intermediate.dense.weight', 'lm_head.bias', 'roberta.encoder.layer.8.intermediate.dense.weight', 'roberta.encoder.layer.11.attention.self.value.weight', 'roberta.encoder.layer.9.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.7.at

CausalityBERT(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(130, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True

## evaluation on the test dataset 



In [38]:
## evaluation on the test dataset 

# test_accuracy, test_mcc_accuracy, nb_test_steps = 0, 0, 0 
test_mcc_accuracy, nb_test_steps = 0, 0,

# test_accuracy = []
test_loss = []
test_acc = []
test_prec = []
test_rec = []
test_f1 = []

for batch in tqdm(test_loader):
    batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
    b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
    with torch.no_grad():
        model.eval()
        logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
    
    
    loss = loss_fn(logits, b_labels)
    test_loss.append(loss.item())

    # move logits and labels to CPU
    logits = logits.detach().to('cpu').numpy()
    label_ids = b_labels.to('cpu').numpy()

    pred_flat = np.argmax(logits, axis=1).flatten()
    labels_flat = label_ids.flatten()

#     eval_accuracy += accuracy_score(labels_flat, pred_flat)
    test_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  


    metrics = compute_metrics(pred_flat, labels_flat)
    test_acc.append(metrics["accuracy"])
    test_prec.append(metrics["precision"])
    test_rec.append(metrics["recall"])
    test_f1.append(metrics["f1"])
    nb_test_steps += 1
    

# print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
print(F'\n\ttest MCC Accuracy: {test_mcc_accuracy / nb_test_steps}') # eval M
print(F'\n\ttest loss: {np.mean(test_loss)}')
print(F'\n\ttest acc: {np.mean(test_acc)}')
print(F'\n\ttest prec: {np.mean(test_prec)}')
print(F'\n\ttest rec: {np.mean(test_rec)}')
print(F'\n\ttest f1: {np.mean(test_f1)}')

  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
100%|██████████| 52/52 [02:47<00:00,  3.22s/it]


	test MCC Accuracy: 0.0

	test loss: 0.44631937260811144

	test acc: 0.8669471153846153

	test prec: 0.7549646935096154

	test rec: 0.8669471153846153

	test f1: 0.8062038738692032





### printig some variabels to look into 

In [40]:
train_loss


[0.46953466534614563,
 0.4695369601249695,
 0.42266905307769775,
 0.3914310932159424,
 0.40704697370529175,
 0.42267104983329773,
 0.4070471227169037,
 0.360171914100647,
 0.4539085030555725,
 0.4382795989513397,
 0.4070567488670349,
 0.46954286098480225,
 0.4226679801940918,
 0.4226563274860382,
 0.45392122864723206,
 0.43829724192619324,
 0.4070553779602051,
 0.48515892028808594,
 0.45390549302101135,
 0.3757953643798828,
 0.4070453643798828,
 0.40703269839286804,
 0.4226720929145813,
 0.5007822513580322,
 0.407050758600235,
 0.45391330122947693,
 0.4851546883583069,
 0.40706220269203186,
 0.3758067190647125,
 0.42265719175338745,
 0.42266646027565,
 0.39142996072769165,
 0.4695398509502411,
 0.42267733812332153,
 0.5007687211036682,
 0.3758033514022827,
 0.43828701972961426,
 0.4226643741130829,
 0.3601856529712677,
 0.4382989704608917,
 0.48515984416007996,
 0.40704599022865295,
 0.4382978081703186,
 0.4539179801940918,
 0.46953898668289185,
 0.4070377051830292,
 0.453912615776062,

In [41]:
train_acc 

[0.84375,
 0.84375,
 0.890625,
 0.921875,
 0.90625,
 0.890625,
 0.90625,
 0.953125,
 0.859375,
 0.875,
 0.90625,
 0.84375,
 0.890625,
 0.890625,
 0.859375,
 0.875,
 0.90625,
 0.828125,
 0.859375,
 0.9375,
 0.90625,
 0.90625,
 0.890625,
 0.8125,
 0.90625,
 0.859375,
 0.828125,
 0.90625,
 0.9375,
 0.890625,
 0.890625,
 0.921875,
 0.84375,
 0.890625,
 0.8125,
 0.9375,
 0.875,
 0.890625,
 0.953125,
 0.875,
 0.828125,
 0.90625,
 0.875,
 0.859375,
 0.84375,
 0.90625,
 0.859375,
 0.921875,
 0.75,
 0.921875,
 0.859375,
 0.828125,
 0.921875,
 0.875,
 0.90625,
 0.84375,
 0.84375,
 0.90625,
 0.84375,
 0.796875,
 0.84375,
 0.765625,
 0.890625,
 0.921875,
 0.90625,
 0.859375,
 0.875,
 0.875,
 0.921875,
 0.890625,
 0.828125,
 0.921875,
 0.875,
 0.90625,
 0.84375,
 0.859375,
 0.90625,
 0.875,
 0.875,
 0.875,
 0.828125,
 0.90625,
 0.9]

In [42]:
train_prec

[0.7119140625,
 0.7119140625,
 0.793212890625,
 0.849853515625,
 0.8212890625,
 0.793212890625,
 0.8212890625,
 0.908447265625,
 0.738525390625,
 0.765625,
 0.8212890625,
 0.7119140625,
 0.793212890625,
 0.793212890625,
 0.738525390625,
 0.765625,
 0.8212890625,
 0.685791015625,
 0.738525390625,
 0.87890625,
 0.8212890625,
 0.8212890625,
 0.793212890625,
 0.66015625,
 0.8212890625,
 0.738525390625,
 0.685791015625,
 0.8212890625,
 0.87890625,
 0.793212890625,
 0.793212890625,
 0.849853515625,
 0.7119140625,
 0.793212890625,
 0.66015625,
 0.87890625,
 0.765625,
 0.793212890625,
 0.908447265625,
 0.765625,
 0.685791015625,
 0.8212890625,
 0.765625,
 0.738525390625,
 0.7119140625,
 0.8212890625,
 0.738525390625,
 0.849853515625,
 0.5625,
 0.849853515625,
 0.738525390625,
 0.685791015625,
 0.849853515625,
 0.765625,
 0.8212890625,
 0.7119140625,
 0.7119140625,
 0.8212890625,
 0.7119140625,
 0.635009765625,
 0.7119140625,
 0.586181640625,
 0.793212890625,
 0.849853515625,
 0.8212890625,
 0.

In [43]:

train_rec

[0.84375,
 0.84375,
 0.890625,
 0.921875,
 0.90625,
 0.890625,
 0.90625,
 0.953125,
 0.859375,
 0.875,
 0.90625,
 0.84375,
 0.890625,
 0.890625,
 0.859375,
 0.875,
 0.90625,
 0.828125,
 0.859375,
 0.9375,
 0.90625,
 0.90625,
 0.890625,
 0.8125,
 0.90625,
 0.859375,
 0.828125,
 0.90625,
 0.9375,
 0.890625,
 0.890625,
 0.921875,
 0.84375,
 0.890625,
 0.8125,
 0.9375,
 0.875,
 0.890625,
 0.953125,
 0.875,
 0.828125,
 0.90625,
 0.875,
 0.859375,
 0.84375,
 0.90625,
 0.859375,
 0.921875,
 0.75,
 0.921875,
 0.859375,
 0.828125,
 0.921875,
 0.875,
 0.90625,
 0.84375,
 0.84375,
 0.90625,
 0.84375,
 0.796875,
 0.84375,
 0.765625,
 0.890625,
 0.921875,
 0.90625,
 0.859375,
 0.875,
 0.875,
 0.921875,
 0.890625,
 0.828125,
 0.921875,
 0.875,
 0.90625,
 0.84375,
 0.859375,
 0.90625,
 0.875,
 0.875,
 0.875,
 0.828125,
 0.90625,
 0.9]

In [44]:

train_f1

[0.7722457627118644,
 0.7722457627118644,
 0.8391012396694215,
 0.884400406504065,
 0.8616803278688524,
 0.8391012396694215,
 0.8616803278688524,
 0.93025,
 0.7943802521008404,
 0.8166666666666667,
 0.8616803278688524,
 0.7722457627118644,
 0.8391012396694215,
 0.8391012396694215,
 0.7943802521008404,
 0.8166666666666667,
 0.8616803278688524,
 0.750267094017094,
 0.7943802521008404,
 0.907258064516129,
 0.8616803278688524,
 0.8616803278688524,
 0.8391012396694215,
 0.728448275862069,
 0.8616803278688524,
 0.7943802521008404,
 0.750267094017094,
 0.8616803278688524,
 0.907258064516129,
 0.8391012396694215,
 0.8391012396694215,
 0.884400406504065,
 0.7722457627118644,
 0.8391012396694215,
 0.728448275862069,
 0.907258064516129,
 0.8166666666666667,
 0.8391012396694215,
 0.93025,
 0.8166666666666667,
 0.750267094017094,
 0.8616803278688524,
 0.8166666666666667,
 0.7943802521008404,
 0.7722457627118644,
 0.8616803278688524,
 0.7943802521008404,
 0.884400406504065,
 0.6428571428571428,
 0.8

In [45]:
val_accuracy

[]

In [46]:
val_loss

[0.46951550245285034,
 0.4382668137550354,
 0.4382668733596802,
 0.4070178270339966,
 0.46951544284820557,
 0.43826672434806824,
 0.4382666051387787,
 0.4695158004760742,
 0.5320134162902832,
 0.4070179760456085,
 0.4070177674293518,
 0.438266783952713,
 0.469515323638916,
 0.4070178270339966,
 0.3757689297199249,
 0.5632620453834534,
 0.3132708668708801,
 0.4070178270339966,
 0.438267320394516,
 0.40701788663864136,
 0.3757689595222473,
 0.46951550245285034,
 0.4070180654525757,
 0.4382669925689697,
 0.4382668435573578,
 0.40701743960380554,
 0.37576913833618164,
 0.34452033042907715,
 0.34452003240585327,
 0.4695158302783966,
 0.3757689297199249,
 0.3757689595222473,
 0.4070180058479309,
 0.5007646083831787,
 0.43826672434806824,
 0.37576913833618164,
 0.5007641911506653,
 0.5007648468017578,
 0.40701791644096375,
 0.4695156514644623,
 0.4070180058479309,
 0.31327152252197266]

In [47]:
val_acc

[0.84375,
 0.875,
 0.875,
 0.90625,
 0.84375,
 0.875,
 0.875,
 0.84375,
 0.78125,
 0.90625,
 0.90625,
 0.875,
 0.84375,
 0.90625,
 0.9375,
 0.75,
 1.0,
 0.90625,
 0.875,
 0.90625,
 0.9375,
 0.84375,
 0.90625,
 0.875,
 0.875,
 0.90625,
 0.9375,
 0.96875,
 0.96875,
 0.84375,
 0.9375,
 0.9375,
 0.90625,
 0.8125,
 0.875,
 0.9375,
 0.8125,
 0.8125,
 0.90625,
 0.84375,
 0.90625,
 1.0]

In [48]:
val_prec

[0.7119140625,
 0.765625,
 0.765625,
 0.8212890625,
 0.7119140625,
 0.765625,
 0.765625,
 0.7119140625,
 0.6103515625,
 0.8212890625,
 0.8212890625,
 0.765625,
 0.7119140625,
 0.8212890625,
 0.87890625,
 0.5625,
 1.0,
 0.8212890625,
 0.765625,
 0.8212890625,
 0.87890625,
 0.7119140625,
 0.8212890625,
 0.765625,
 0.765625,
 0.8212890625,
 0.87890625,
 0.9384765625,
 0.9384765625,
 0.7119140625,
 0.87890625,
 0.87890625,
 0.8212890625,
 0.66015625,
 0.765625,
 0.87890625,
 0.66015625,
 0.66015625,
 0.8212890625,
 0.7119140625,
 0.8212890625,
 1.0]

In [None]:
val_rec

In [None]:
val_f1

In [None]:
test_accuracy

In [None]:
test_loss

In [None]:
test_acc

In [None]:
test_prec

In [None]:
test_rec

In [None]:
test_f1