## A model build using TweetBERT to classify tweet as causal or non-causal 

In [1]:
import pandas as pd
import numpy as np
import spacy 
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification, AutoTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import random
import os
import torch.nn.functional as F
import torch
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
import transformers
from tqdm import tqdm, trange
#from google.colab import drive, files
import io

In [2]:
#uploaded = files.upload()
#data = pd.read_excel(io.BytesIO(uploaded['Causality + hypoglycemia.xlsx']), sheet_name=">5000_samples_")


# data = pd.read_excel("/home/adrian/Downloads/Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")
data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")

  data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")


In [3]:
print("Total count:", data.shape[0])
data = data[data["Causal association"].notnull()]
print("Labeled count:", data.shape[0])

data.head()

Total count: 5456
Labeled count: 5000


Unnamed: 0,id,text,full_text,Intent,Cause,Effect,Causal association,Charline association0=no;1=yes,Remarks
0,908171203029868545,"tonight , I learned my older girl will back he...","tonight , I learned my older girl will back he...",,,,0.0,,
1,1203645589214367745,USER USER I knew diabetes and fibromyalgia wer...,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0.0,,
2,1310596731063525376,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,mS,,,0.0,,
3,1125198453167022085,USER Cheers ! Have one for this diabetic too !,USER Cheers ! Have one for this diabetic too !,mS,,,0.0,,
4,1248600944138268673,USER Additionally the medicines are being char...,USER Additionally the medicines are being char...,,medicines are being charged at MRP,costing much higher,1.0,,


### Interrater-reliabilty measure

In [4]:
from sklearn.metrics import cohen_kappa_score

charline = data[data["Charline association0=no;1=yes"].notnull()]
coder1 = charline["Causal association"].values
coder2 = charline["Charline association0=no;1=yes"]
score = cohen_kappa_score(coder1,coder2)
print('Cohen\'s Kappa:',score)

Cohen's Kappa: 0.823719518166683


### Preprocessing

In [5]:
data["Causal association"].value_counts()

0.0    3720
1.0    1280
Name: Causal association, dtype: int64

In [6]:
# Normalisation for BertTweet
from nltk.tokenize import TweetTokenizer
from emoji import demojize
import re

tokenizer = TweetTokenizer()

# https://huggingface.co/vinai/bertweet-base
def normalizeToken(token):
    lowercased_token = token.lower()
    if token.startswith("@"):
        return "@USER"
    elif lowercased_token.startswith("http") or lowercased_token.startswith("www"):
        return "HTTPURL"
    elif len(token) == 1:
        return demojize(token)
    else:
        if token == "’":
            return "'"
        elif token == "…":
            return "..."
        else:
            return token

def normalizeTweet(tweet):

    tokens = tokenizer.tokenize(tweet.replace("’", "'").replace("…", "..."))
    normTweet = " ".join([normalizeToken(token) for token in tokens])

    normTweet = normTweet.replace("cannot ", "can not ").replace("n't ", " n't ").replace("n 't ", " n't ").replace("ca n't", "can't").replace("ai n't", "ain't")
    normTweet = normTweet.replace("'m ", " 'm ").replace("'re ", " 're ").replace("'s ", " 's ").replace("'ll ", " 'll ").replace("'d ", " 'd ").replace("'ve ", " 've ")
    normTweet = normTweet.replace(" p . m .", "  p.m.") .replace(" p . m ", " p.m ").replace(" a . m .", " a.m.").replace(" a . m ", " a.m ")

    normTweet = re.sub(r",([0-9]{2,4}) , ([0-9]{2,4})", r",\1,\2", normTweet)
    normTweet = re.sub(r"([0-9]{1,3}) / ([0-9]{2,4})", r"\1/\2", normTweet)
    normTweet = re.sub(r"([0-9]{1,3})- ([0-9]{2,4})", r"\1-\2", normTweet)
    
    return " ".join(normTweet.split())

In [7]:
def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    sentences = text.split("<stop>")
    #sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    sentences = [s  for s in sentences if s != ""]
    return sentences


def create_training_data(data):
    tweets = []
    causal_labels = []
    
    for i, row in data.iterrows():
#        print("\n")
#        print(row["full_text"])
#        print(row["Intent"], "->", row["Causal association"])
       
        sentences = split_into_sentences(row["full_text"])
        intents = set(str(row["Intent"]).strip().split(";"))
        causes = str(row["Cause"]).strip().split(";")
        effects = str(row["Effect"]).strip().split(";")
#        print("\tintents:", intents)
#        print("\tcauses: '{}'".format(causes))
#        print("\teffects: '{}'".format(effects))
        
        if set({"nan"}) == intents or set({" "}) == intents: # single sentence
#            print("\tA: add => causality: {}".format(row["Causal association"]))
            tweets.append(row["full_text"])
            causal_labels.append(row["Causal association"])

        elif (
             set({"q"}) == intents 
          or set({"joke"}) == intents 
          or set({"q", "joke"}) == intents
          or set({"joke", "mS"}) == intents 
          or set({"neg"}) == intents 
          or set({"neg", "msS"}) == intents
          or set({"neg", "mS"}) == intents
          or set({"neg", "msS", "mE"}) == intents
          or set({"q", "joke", "mS"}) == intents
          or set({"q", "msS", "neg"}) == intents
          or set({"neg", "mC"}) == intents
          or set({"mC", "joke", "msS"}) == intents
          or set({"joke", "mE"}) == intents
        ):
#            print("\tB ignore")
            continue
        elif (  
             set({"mS"}) == intents # multiple sentences (possible that cause and effect in different sentences -> ignore)
          or set({"q", "mS"}) == intents # multiple sentences or question
          or set({"mS", "mE"}) == intents
          or set({"mC", "mS"}) == intents
          or set({"mC", "mS", "mE"}) == intents
          or set({"q", "mC", "mS"}) == intents
          or set({"q", "mC", "mS", "mE"}) == intents
            
        ):
            for sent in sentences:
#                print(sent)
                if sent[-1] != "?": # ignore questions
#                    print("\tC: add, causality => 0")
                    tweets.append(sent)
                    causal_labels.append(0)  
#                else:
#                    print("\tD: ignore")
        elif (
            set({"msS"}) == intents # multiple sentences with cause and effect in single sentence
         or set({"q", "msS"}) == intents # msS and a question
         or set({"msS", "mE"}) == intents # msS with several effects
         or set({"mC", "msS"}) == intents
         or set({"mE"}) == intents # multiple effects
         or set({"mC"}) == intents # multiple causes
         or set({"mC", "msS", "mE"}) == intents
         or set({"mC", "mE"}) == intents
         or set({"q", "mC", "mE"}) == intents   
         or set({"q", "mC", "msS"}) == intents
        ):

            if row["Causal association"] != 1: #TEST
                print(sentences)
                print("1) ERROR: Causal association should be 1 !!!!")      
                print()
        
            for sent in sentences:
#                print("sub sent:", sent)
                if sent[-1] != "?": # ignore question
                    
                    existCause = False
                    for cause in causes:
                        if cause in sent:
#                            print("Cause: <{}> in sent".format(cause))
                            existCause = True
                    
                    existEffect = False
                    for effect in effects:
                        if effect in sent:
#                            print("Effect: <{}> in sent".format(effect))
                            existEffect = True
                            
                    if existCause and existEffect:
                        tweets.append(sent)
                        causal_labels.append(row["Causal association"])
#                        print("E: add with Cause + effect => association: {}".format(row["Causal association"]))
                    else:
                        tweets.append(sent)
                        causal_labels.append(0)
#                        print("F: not both C + E in sentence, association: 0")
#                else:
#                    print("H: question in sentence")
            if row["Causal association"] == 0:
                print(sentences)
                print("H: should not have causality == 0")
#        else:
#            print("END: should not happen")

    
    return pd.DataFrame({"tweet" : tweets, "Causal association" : causal_labels})

trainingData = create_training_data(data)
print("N sentences:", trainingData.shape)
trainingData = trainingData[trainingData["tweet"].str.split(" ").str.len() > 3] # keep only sentence with more than 3 tokens
print("N sentences with > 3 words:", trainingData.shape)

N sentences: (9779, 2)
N sentences with > 3 words: (8235, 2)


In [8]:
# let's print a few example of our cleaned and normalized traingin dataset
trainingData.head()

Unnamed: 0,tweet,Causal association
0,"tonight , I learned my older girl will back he...",0.0
1,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,0.0
2,I'm a trans woman .,0.0
3,"Both of us could use a world where "" brave and...",0.0
4,"Make a world where people can just be , withou...",0.0


In [9]:
trainingData["Causal association"].value_counts()

0.0    7218
1.0    1017
Name: Causal association, dtype: int64

### Training

In [10]:
# text = trainingData["tweet"].map(normalizeTweet).values.tolist()
# labels = trainingData["Causal association"].values.tolist()
# # first split the data into traingin and testing label in the ratio of 80:20
# train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2)
# # split the new training data (80% of actual data) to get train and validation set
# train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2)



# print("Train: {}".format(len(train_texts)))
# print("Val: {}".format(len(val_texts)))
# print("Test: {}".format(len(test_texts)))


In [11]:
## when not straitifyign the data: 

text = trainingData["tweet"].map(normalizeTweet).values.tolist()
labels = trainingData["Causal association"].values.tolist()
# first split the data into training and testing label in the ratio of 80:20
train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2)
# split the new training data (80% of actual data) to get train and validation set
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2)

data_count_info = pd.Series(labels).value_counts(normalize=True)
train_count_info = pd.Series(train_labels).value_counts(normalize=True)
val_count_info = pd.Series(val_labels).value_counts(normalize=True)
test_count_info = pd.Series(test_labels).value_counts(normalize=True)

# for class-imbalanced dataset, the class weight for a ith class
# to be specified for balancing in the loss function is given by:
# weight[i] = num_samples / (num_classes * num_samples[i])
# since train_count_info obtained above has fraction of samples
# for ith class, hence the corresponding weight calculation is:
class_weight = (1/train_count_info)/len(train_count_info)

print("All: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(labels), *data_count_info.round(4).to_list()))
print("Train: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(train_labels), *train_count_info.round(4).to_list()))
print("Val: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(val_labels), *val_count_info.round(4).to_list()))
print("Test: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(test_labels), *test_count_info.round(4).to_list()))
print("Balancing class wts: for 0 = {}, for 1 = {}".format(
    *class_weight.round(4).to_list()))

All: Count = 8235, % of 0 = 0.8765, % of 1 = 0.1235
Train: Count = 5270, % of 0 = 0.8818, % of 1 = 0.1182
Val: Count = 1318, % of 0 = 0.871, % of 1 = 0.129
Test: Count = 1647, % of 0 = 0.864, % of 1 = 0.136
Balancing class wts: for 0 = 0.567, for 1 = 4.2295


In [12]:
####################### Stratified splits ####################

text = trainingData["tweet"].map(normalizeTweet).values.tolist()
labels = trainingData["Causal association"].values.tolist()
# first split the data into training and testing label in the ratio of 80:20
train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2, stratify=labels)
# split the new training data (80% of actual data) to get train and validation set
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2, stratify=train_labels)



data_count_info = pd.Series(labels).value_counts(normalize=True)
train_count_info = pd.Series(train_labels).value_counts(normalize=True)
val_count_info = pd.Series(val_labels).value_counts(normalize=True)
test_count_info = pd.Series(test_labels).value_counts(normalize=True)

# for class-imbalanced dataset, the class weight for a ith class
# to be specified for balancing in the loss function is given by:
# weight[i] = num_samples / (num_classes * num_samples[i])
# since train_count_info obtained above has fraction of samples
# for ith class, hence the corresponding weight calculation is:
class_weight = (1/train_count_info)/len(train_count_info)

print("All: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(labels), *data_count_info.round(4).to_list()))
print("Train: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(train_labels), *train_count_info.round(4).to_list()))
print("Val: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(val_labels), *val_count_info.round(4).to_list()))
print("Test: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(test_labels), *test_count_info.round(4).to_list()))
print("Balancing class wts: for 0 = {}, for 1 = {}".format(
    *class_weight.round(4).to_list()))

All: Count = 8235, % of 0 = 0.8765, % of 1 = 0.1235
Train: Count = 5270, % of 0 = 0.8765, % of 1 = 0.1235
Val: Count = 1318, % of 0 = 0.8763, % of 1 = 0.1237
Test: Count = 1647, % of 0 = 0.8767, % of 1 = 0.1233
Balancing class wts: for 0 = 0.5705, for 1 = 4.0476


In [13]:
# train_texts[1]

In [14]:
# Transform labels + encodings into Pytorch DataSet object (including __len__, __getitem__)
class TweetDataSet(torch.utils.data.Dataset):
    def __init__(self, text, labels, tokenizer):
        self.text = text
        self.labels = labels
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.text, padding=True, truncation=True, return_token_type_ids=True)
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]
        return {
                "input_ids" : torch.tensor(ids[idx], dtype=torch.long)
              , "attention_mask" : torch.tensor(mask[idx], dtype=torch.long)
              , "token_type_ids" : torch.tensor(token_type_ids[idx], dtype=torch.long)
              , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
        }      

    def __len__(self):
        return len(self.labels)

    
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")

train_dataset = TweetDataSet(train_texts, train_labels, tokenizer)
val_dataset = TweetDataSet(val_texts, val_labels, tokenizer)
test_dataset = TweetDataSet(test_texts, test_labels, tokenizer)
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

# put data to batches (iterables)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


5270
1318
1647


In [15]:
# !jupyter nbextension enable --py widgetsnbextension

In [16]:
# # from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
# # from transformers import AutoModelForSequenceClassification
# from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef

# def compute_metrics(pred, labels):
#     #labels = pred.label_ids
#     #preds = pred.predictions.argmax(-1)
#     precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='binary')
#     acc = accuracy_score(labels, pred)
#     return {
#         'accuracy': acc,
#         'f1': f1,
#         'precision': precision,
#         'recall': recall
#     }



# class CausalityBERT(torch.nn.Module):
#     """ Model Bert"""
#     def __init__(self):
#         super(CausalityBERT, self).__init__()
#         self.num_labels = 2
#         self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
#         self.dropout = torch.nn.Dropout(0.3)
#         self.linear = torch.nn.Linear(768, self.num_labels)
#         # softmax layer missing ? -> Vivek
        
#     def forward(self, input_ids, attention_mask, token_type_ids):
#         _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False)
#         output_2 = self.dropout(output_1)
#         logits = self.linear(output_2)
#         return logits


# ## Model parameters
# batchsize_train = 16
# lr = 5e-5
# adam_eps = 1e-8
# epochs = 3 
# num_warmup_steps = 0
# num_training_steps = len(train_loader)*epochs

# # Store our loss and accuracy for plotting
# train_loss_set = []
# learning_rate = []


# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# #model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base")
# model = CausalityBERT()
# model.to(device)

# # fine-tune only the task-specific parameters -> Vivek? 
# for param in model.bert.parameters():
#     param.requires_grad = False
    
# model.to(device)
# model.train() # set model to training mode


# optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
# scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0; after a warmup period durnig which it increases linearly from 0 to the initial lr set in the optimizer
# loss_fn = CrossEntropyLoss()

# for epoch in trange(1, epochs+1, desc='Epoch'):
#     print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")
    
#     #calculate total loss for this epoch
#     batch_loss = 0
    
#     for batch in tqdm(train_loader):
#         #print("batch:", batch)
#         optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
#         input_ids = batch['input_ids'].to(device)
#         attention_mask = batch['attention_mask'].to(device)
#         token_type_ids = batch["token_type_ids"].to(device)
#         labels = batch['labels'].to(device)
#         #logits = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # forward pass
#         logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        
        
        
        
#         loss = loss_fn(logits, labels)
#         print("loss:", loss)
#         #loss = outputs[0]
#         loss.backward() # backward pass
#         optim.step()    # update parameters and take a step up using the computed gradient
#         scheduler.step()# update learning rate scheduler
#         batch_loss += loss.item() # update tracking variables
        
#     avg_train_loss = batch_loss / len(train_loader) # calculate avg loss over training data

#     # store the current learning rate
#     for param_group in optim.param_groups:
#         print("\n\tCurrent Learning rate: ", param_group['lr'])
#         learning_rate.append(param_group['lr'])
    
#     train_loss_set.append(avg_train_loss)
#     print(F'\n\tAverage Training loss: {avg_train_loss}')

    
#     ## ---- Validation ------
#     model.eval() # put model in evaluation mode for validation set
    
#     eval_accuracy, eval_mcc_accuracy, nb_eval_steps = 0, 0, 0 # Tracking variables
#     val_accuracy = []
#     val_loss = []
#     val_acc = []
#     val_prec = []
#     val_rec = []
#     val_f1 = []
    
    
#     # Evaluate data for one epoch
#     for batch in tqdm(validation_loader):
#         batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
#         b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
        
#         with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
#           #  logits = model(b_inputs_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids) # forward pass, calculates logit predictions
#           logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions
            
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())
        
#         # move logits and labels to CPU
#         logits = logits.detach().to('cpu').numpy()
#         label_ids = b_labels.to('cpu').numpy()
        
#         pred_flat = np.argmax(logits, axis=1).flatten() # convert logits to list of predicted labels
#         labels_flat = label_ids.flatten()
#         eval_accuracy += accuracy_score(labels_flat, pred_flat)
#         eval_mcc_accrucay += matthews_corrcoef(labels_flat, pred_flat)        
#         metrics = compute_metrics(pred_flat, labels_flat)
#         val_acc.append(metrics["accuracy"])
#         val_prec.append(metrics["precision"])
#         val_rec.append(metrics["recall"])
#         val_f1.append(metrics["f1"])
#         nb_eval_steps += 1
        
#     print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation loss: {np.mean(val_loss)}')
#     print(F'\n\tValidation acc: {np.mean(val_acc)}')
#     print(F'\n\tValidation prec: {np.mean(val_prec)}')
#     print(F'\n\tValidation rec: {np.mean(val_rec)}')
#     print(F'\n\tValidation f1: {np.mean(val_f1)}')

    


In [17]:
# from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
# from transformers import AutoModelForSequenceClassification

## we are measuring weighted metrics - as our dataset is unbalanced 
# Calculate metrics for each label, and find their average weighted by support
#(the number of true instances for each label). 
#This alters ‘macro’ to account for label imbalance; 
# it can result in an F-score that is not between precision and recall.


from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef

def compute_metrics(pred, labels):
    #labels = pred.label_ids
    #preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='weighted')
    acc = accuracy_score(labels, pred)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }



class CausalityBERT(torch.nn.Module):
    """ Model Bert"""
    def __init__(self):
        super(CausalityBERT, self).__init__()
        self.num_labels = 2
        self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
        self.dropout = torch.nn.Dropout(0.3)
        self.linear1 = torch.nn.Linear(768, 256)
        self.linear2 = torch.nn.Linear(256, self.num_labels)
        self.softmax = torch.nn.Softmax(-1)
        
    def forward(self, input_ids, attention_mask, token_type_ids):
        _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False) # if output 1 is our cls token        
        output_2 = self.dropout(output_1)
        output_3 = self.linear1(output_2)  
        output_4 = self.dropout(output_3)
        output_5 = self.linear2(output_4)
        # cross entory will take care of the logits - we don't need if we are usign cross entropy for loss function 
        # if doing yourself - use nll loss and logSoftmax 
#         logit = self.softmax(output_5)
        return output_5



    


## Model parameters


In [18]:
batchsize_train = 16
lr = 5e-5
adam_eps = 1e-8
epochs = 35
num_warmup_steps = 0
num_training_steps = len(train_loader)*epochs

In [19]:


# Store our loss and accuracy for plotting : where is accuracy : it is loss an dlearning rate 
train_loss_set = []
learning_rate = []


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = CausalityBERT()
model.to(device)

# fine-tune only the task-specific parameters -> Vivek? 
## can we just update last layer of BERT 
for param in model.bert.parameters():
    param.requires_grad = False


optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0; after a warmup period durnig which it increases linearly from 0 to the initial lr set in the optimizer
# loss_fn = CrossEntropyLoss()

## penalising more for class with less number of exaplmes 
loss_fn = CrossEntropyLoss(torch.tensor(class_weight.to_list()).to(device))


You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.0.output.dense.weight', 'roberta.embeddings.token_type_embeddings.weight', 'roberta.encoder.layer.9.output.dense.weight', 'roberta.encoder.layer.5.attention.self.key.weight', 'roberta.encoder.layer.7.attention.self.query.bias', 'roberta.encoder.layer.0.attention.self.key.bias', 'roberta.encoder.layer.7.intermediate.dense.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.6.attention.self.key.weight', 'lm_head.bias', 'roberta.encoder.layer.6.attention.self.query.bias', 'roberta.encoder.layer.11.attention.self.query.weight', 'roberta.embeddings.LayerNorm.weight', 'roberta.encoder.layer.4.intermediate.dense.weight', 'roberta.encoder.layer.0.attention.output.dense.bias', 'robe

In [20]:



for epoch in trange(1, epochs+1, desc='Epoch'):
    print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")
    
    #calculate total loss for this epoch
#     epoch_loss = 0
    
    ########### training eval metrics #############################
    tr_accuracy, tr_mcc_accuracy, nb_tr_steps = 0, 0, 0 # Tracking variables
    train_accuracy = []
    train_loss = []
    train_acc = []
    train_prec = []
    train_rec = []
    train_f1 = []
    
    ###################################################
    
    for batch in tqdm(train_loader):
        #print("batch:", batch)
        optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch["token_type_ids"].to(device)
        labels = batch['labels'].to(device)
        #logits = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # forward pass
#         outputs = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        ###########################################################################
        model.train()
        ## this is output_5 based on our model defined above 
        logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        #############################################################################
        loss = loss_fn(logits, labels)
        print("loss:", loss)
        #loss = outputs[0]
        loss.backward() # backward pass
        optim.step()    # update parameters and take a step up using the computed gradient
        scheduler.step()# update learning rate scheduler
#         epoch_loss += loss.item() # update tracking variables
        train_loss.append(loss.item())
        
    
    ############# Training Accuracy Measure ##########################################
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())

        # move logits and labels to CPU
        logits = logits.detach().to('cpu').numpy()
        label_ids = labels.to('cpu').numpy()

        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()

# #         tr_accuracy += accuracy_score(labels_flat, pred_flat) # this is same as metric accuracy 
#         tr_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  


        metrics = compute_metrics(pred_flat, labels_flat)
#         print(metrics)
        
        
#         train_loss.append(loss)
        train_acc.append(metrics["accuracy"])
        train_prec.append(metrics["precision"])
        train_rec.append(metrics["recall"])
        train_f1.append(metrics["f1"])
        
#         batch_loss
#         nb_tr_steps += 1
        
#     batch_loss = np.mean(train_loss)
#     train_loss_set.append(batch_loss)
        
    
#     print(F'\n\tTraining Accuracy: {tr_accuracy / nb_tr_steps}') # accuracy is calculated twice 
#     print(F'\n\tTraining MCC Accuracy: {tr_mcc_accuracy / nb_tr_steps}') # eval M
    
#     train_loss_set.append(batch_loss) # it should be either loss or batch_loss
    
    
#     train_loss = train_loss.to('cpu').numpy()
    print(F'\n\tTrain loss: {np.mean(train_loss)}')
    print(F'\n\ttrain acc: {np.mean(train_acc)}')
    print(F'\n\ttraining prec: {np.mean(train_prec)}')
    print(F'\n\ttraining rec: {np.mean(train_rec)}')
    print(F'\n\ttraining f1: {np.mean(train_f1)}')
    
    
    ###################################################################################
    #     avg_train_loss = batch_loss / len(train_loader) # calculate avg loss over training data : we don't need it as you did it for each sample
    #     train_loss_set.append(avg_train_loss)
    #     print(F'\n\tAverage Training loss: {avg_train_loss}')
    
#     train_loss_set.append(train_loss)
#     print(F'\n\tAverage Training loss per epoch: {train_loss}')


    # store the current learning rate
    for param_group in optim.param_groups:
        print("\n\tCurrent Learning rate: ", param_group['lr'])
        learning_rate.append(param_group['lr'])
    


    
    ## ---- Validation ------
#     model.eval() # put model in evaluation mode for validation set
    
    eval_accuracy, eval_mcc_accuracy, nb_eval_steps = 0, 0, 0 # Tracking variables
    val_accuracy = []
    val_loss = []
    val_acc = []
    val_prec = []
    val_rec = []
    val_f1 = []
    
    
    # Evaluate data for one epoch
    for batch in tqdm(validation_loader):
        batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
        b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
        
        with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
          #  logits = model(b_inputs_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids) # forward pass, calculates logit predictions
#             outputs = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
            ##################################################################################
            model.eval()
            logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 

                        
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())
        
        # move logits and labels to CPU
        logits = logits.detach().to('cpu').numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()
        
#         eval_accuracy += accuracy_score(labels_flat, pred_flat)
#         eval_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  
        
        
        metrics = compute_metrics(pred_flat, labels_flat)
        val_acc.append(metrics["accuracy"])
        val_prec.append(metrics["precision"])
        val_rec.append(metrics["recall"])
        val_f1.append(metrics["f1"])
#         nb_eval_steps += 1
        
#     print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy / nb_eval_steps}') # eval M
    
#     val_loss = val_loss.to('cpu').numpy()
    print(F'\n\tValidation loss: {np.mean(val_loss)}')
    print(F'\n\tValidation acc: {np.mean(val_acc)}')
    print(F'\n\tValidation prec: {np.mean(val_prec)}')
    print(F'\n\tValidation rec: {np.mean(val_rec)}')
    print(F'\n\tValidation f1: {np.mean(val_f1)}')


Epoch:   0%|          | 0/35 [00:00<?, ?it/s]
  0%|          | 0/83 [00:00<?, ?it/s][A



  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)


loss: 


  1%|          | 1/83 [00:22<30:16, 22.15s/it][A

tensor(0.6647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:04, 21.54s/it][A

tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:06, 21.09s/it][A

tensor(0.7044, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:25, 20.83s/it][A

tensor(0.7387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<26:55, 20.72s/it][A

tensor(0.7392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:29, 20.65s/it][A

tensor(0.6936, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:02, 20.56s/it][A

tensor(0.7390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<25:37, 20.51s/it][A

tensor(0.7089, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:16, 20.50s/it][A

tensor(0.6727, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:26<24:53, 20.46s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:32, 20.45s/it][A

tensor(0.6610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:07<24:09, 20.41s/it][A

tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:28<23:48, 20.40s/it][A

tensor(0.6753, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:48<23:26, 20.39s/it][A

tensor(0.7209, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:08<23:05, 20.38s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:29<22:45, 20.39s/it][A

tensor(0.6976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:49<22:24, 20.38s/it][A

tensor(0.7309, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:09<22:04, 20.38s/it][A

tensor(0.7028, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:30<21:43, 20.37s/it][A

tensor(0.7216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:50<21:23, 20.38s/it][A

tensor(0.7195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:11<21:03, 20.38s/it][A

tensor(0.6887, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:31<20:43, 20.38s/it][A

tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:51<20:22, 20.38s/it][A

tensor(0.7253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:12<20:03, 20.39s/it][A

tensor(0.7256, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:32<19:42, 20.38s/it][A

tensor(0.6915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:52<19:22, 20.39s/it][A

tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:13<19:01, 20.38s/it][A

tensor(0.6826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:33<18:40, 20.38s/it][A

tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:53<18:19, 20.35s/it][A

tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:14<17:59, 20.37s/it][A

tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:34<17:39, 20.38s/it][A

tensor(0.6613, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:55<17:20, 20.40s/it][A

tensor(0.6760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:15<17:01, 20.42s/it][A

tensor(0.7043, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:36<16:40, 20.42s/it][A

tensor(0.6896, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:56<16:18, 20.38s/it][A

tensor(0.7143, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:16<15:56, 20.35s/it][A

tensor(0.6873, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:36<15:35, 20.33s/it][A

tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:57<15:16, 20.36s/it][A

tensor(0.7034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:17<14:57, 20.39s/it][A

tensor(0.6382, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:38<14:37, 20.41s/it][A

tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:58<14:17, 20.42s/it][A

tensor(0.7072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:19<13:57, 20.42s/it][A

tensor(0.6830, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:39<13:36, 20.42s/it][A

tensor(0.7248, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:00<13:16, 20.43s/it][A

tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:20<12:55, 20.40s/it][A

tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:40<12:34, 20.40s/it][A

tensor(0.6399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:01<12:13, 20.38s/it][A

tensor(0.7513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:21<11:53, 20.38s/it][A

tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:41<11:33, 20.39s/it][A

tensor(0.6587, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:02<11:13, 20.40s/it][A

tensor(0.6806, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:22<10:53, 20.42s/it][A

tensor(0.7285, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:43<10:31, 20.38s/it][A

tensor(0.6760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:03<10:11, 20.38s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:23<09:50, 20.35s/it][A

tensor(0.7299, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:44<09:29, 20.34s/it][A

tensor(0.6680, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:04<09:09, 20.36s/it][A

tensor(0.6461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:24<08:49, 20.35s/it][A

tensor(0.6797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:45<08:29, 20.36s/it][A

tensor(0.6833, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:05<08:09, 20.38s/it][A

tensor(0.6793, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:25<07:48, 20.36s/it][A

tensor(0.7052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:46<07:28, 20.37s/it][A

tensor(0.6326, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:06<07:07, 20.37s/it][A

tensor(0.6637, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:27<06:47, 20.38s/it][A

tensor(0.7054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:47<06:27, 20.38s/it][A

tensor(0.6815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:07<06:06, 20.37s/it][A

tensor(0.6963, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:28<05:46, 20.36s/it][A

tensor(0.6644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:48<05:25, 20.35s/it][A

tensor(0.7337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:08<05:04, 20.30s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:28<04:44, 20.30s/it][A

tensor(0.6597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:49<04:23, 20.26s/it][A

tensor(0.6987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:09<04:02, 20.23s/it][A

tensor(0.6810, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:29<03:42, 20.22s/it][A

tensor(0.6833, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:49<03:22, 20.21s/it][A

tensor(0.6537, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:09<03:01, 20.21s/it][A

tensor(0.7144, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:30<02:41, 20.20s/it][A

tensor(0.6678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:50<02:21, 20.18s/it][A

tensor(0.6714, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:10<02:01, 20.18s/it][A

tensor(0.6745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:30<01:40, 20.19s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:50<01:20, 20.22s/it][A

tensor(0.7006, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:11<01:00, 20.23s/it][A

tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:31<00:40, 20.20s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:51<00:20, 20.19s/it][A

tensor(0.6544, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:58<00:00, 20.22s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7038, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6900872580976372

	train acc: 0.5572460295728368

	training prec: 0.7996811081319173

	training rec: 0.5572460295728368

	training f1: 0.613270272716099

	Current Learning rate:  4.8571428571428576e-05



  2%|▏         | 1/42 [00:02<01:49,  2.66s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.60s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.60s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.61s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.60s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.60s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.58s/it][A
 48%|████▊     | 20/42 [00:52<00:56,  2


	Validation loss: 0.6646892300673893

	Validation acc: 0.7061011904761905

	Validation prec: 0.8485253660632068

	Validation rec: 0.7061011904761905

	Validation f1: 0.7548689303045351
loss: 


  1%|          | 1/83 [00:21<28:44, 21.03s/it][A

tensor(0.7084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:19, 20.98s/it][A

tensor(0.7117, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:59, 20.99s/it][A

tensor(0.6908, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:40, 21.02s/it][A

tensor(0.6863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:19, 21.02s/it][A

tensor(0.6309, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<26:59, 21.03s/it][A

tensor(0.6899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:39, 21.05s/it][A

tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:18, 21.05s/it][A

tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:09<25:58, 21.06s/it][A

tensor(0.6954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:36, 21.04s/it][A

tensor(0.6588, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:51<25:14, 21.03s/it][A

tensor(0.6093, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:12<24:53, 21.03s/it][A

tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:20, 20.86s/it][A

tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:51<23:22, 20.32s/it][A

tensor(0.7018, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:12<23:14, 20.51s/it][A

tensor(0.6560, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:33<23:04, 20.66s/it][A

tensor(0.6424, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:54<22:48, 20.74s/it][A

tensor(0.7225, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:15<22:33, 20.83s/it][A

tensor(0.6489, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:16, 20.88s/it][A

tensor(0.6411, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:57<21:57, 20.91s/it][A

tensor(0.7453, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:17<21:11, 20.50s/it][A

tensor(0.6268, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:38<21:00, 20.67s/it][A

tensor(0.6670, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:59<20:45, 20.75s/it][A

tensor(0.6331, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:20<20:28, 20.82s/it][A

tensor(0.6775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:41<20:09, 20.86s/it][A

tensor(0.6689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:02<19:51, 20.90s/it][A

tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:23<19:31, 20.92s/it][A

tensor(0.6958, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:43<19:05, 20.83s/it][A

tensor(0.6868, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:04<18:48, 20.89s/it][A

tensor(0.6836, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:25<18:30, 20.95s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:46<18:09, 20.95s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:07<17:44, 20.88s/it][A

tensor(0.7032, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:28<17:22, 20.84s/it][A

tensor(0.6219, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:48<16:52, 20.67s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:09<16:36, 20.75s/it][A

tensor(0.6534, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:30<16:18, 20.82s/it][A

tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:51<15:59, 20.86s/it][A

tensor(0.6781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:12<15:39, 20.87s/it][A

tensor(0.6555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:33<15:19, 20.89s/it][A

tensor(0.6853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:54<14:58, 20.89s/it][A

tensor(0.6904, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:15<14:38, 20.92s/it][A

tensor(0.6546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:35<14:14, 20.84s/it][A

tensor(0.6713, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:56<13:54, 20.87s/it][A

tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:17<13:34, 20.88s/it][A

tensor(0.6859, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:38<13:15, 20.94s/it][A

tensor(0.6724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:59<12:54, 20.92s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:20<12:33, 20.93s/it][A

tensor(0.6241, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:41<12:12, 20.92s/it][A

tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:02<11:50, 20.91s/it][A

tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:23<11:29, 20.89s/it][A

tensor(0.6533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:44<11:08, 20.89s/it][A

tensor(0.7080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:04<10:47, 20.87s/it][A

tensor(0.6784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:25<10:26, 20.88s/it][A

tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:46<10:04, 20.86s/it][A

tensor(0.6745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:07<09:44, 20.87s/it][A

tensor(0.6863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:28<09:21, 20.81s/it][A

tensor(0.6706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:49<09:01, 20.83s/it][A

tensor(0.6463, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:10<08:41, 20.85s/it][A

tensor(0.6946, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:31<08:21, 20.89s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:51<08:00, 20.88s/it][A

tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:12<07:39, 20.87s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:33<07:17, 20.85s/it][A

tensor(0.6458, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:54<06:57, 20.86s/it][A

tensor(0.6983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:15<06:36, 20.86s/it][A

tensor(0.6387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:36<06:15, 20.88s/it][A

tensor(0.6401, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:57<05:54, 20.88s/it][A

tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:18<05:34, 20.92s/it][A

tensor(0.6389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:39<05:14, 20.94s/it][A

tensor(0.6829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:00<04:53, 20.95s/it][A

tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:20<04:32, 20.95s/it][A

tensor(0.6430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:41<04:11, 20.92s/it][A

tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:02<03:50, 20.93s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:23<03:29, 20.94s/it][A

tensor(0.6552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:44<03:08, 20.94s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:05<02:47, 20.95s/it][A

tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:26<02:26, 20.93s/it][A

tensor(0.6510, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:47<02:05, 20.92s/it][A

tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:08<01:44, 20.94s/it][A

tensor(0.6957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:29<01:23, 20.98s/it][A

tensor(0.6751, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:50<01:02, 20.95s/it][A

tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:11<00:41, 20.95s/it][A

tensor(0.6901, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:31<00:20, 20.59s/it][A

tensor(0.6790, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:38<00:00, 20.70s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6883, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6672795924795679

	train acc: 0.6387082420591457

	training prec: 0.8259786187706238

	training rec: 0.6387082420591457

	training f1: 0.6885136210083882

	Current Learning rate:  4.714285714285714e-05



  2%|▏         | 1/42 [00:02<01:49,  2.66s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.62s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.61s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.62s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.61s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.62s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.60s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6435686151186625

	Validation acc: 0.716765873015873

	Validation prec: 0.8506495746158422

	Validation rec: 0.716765873015873

	Validation f1: 0.763243362250044
loss: 


  1%|          | 1/83 [00:20<28:32, 20.88s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:10, 20.87s/it][A

tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:50, 20.88s/it][A

tensor(0.7138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:28, 20.87s/it][A

tensor(0.6858, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:08, 20.88s/it][A

tensor(0.6224, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:47, 20.87s/it][A

tensor(0.6619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:28, 20.90s/it][A

tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:09, 20.92s/it][A

tensor(0.7272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:47, 20.92s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:26, 20.90s/it][A

tensor(0.6418, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:05, 20.91s/it][A

tensor(0.6310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:43, 20.90s/it][A

tensor(0.6186, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:21, 20.88s/it][A

tensor(0.6594, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<24:01, 20.89s/it][A

tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:39, 20.88s/it][A

tensor(0.6612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:18, 20.88s/it][A

tensor(0.6716, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<22:58, 20.89s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:39, 20.91s/it][A

tensor(0.6653, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:35<21:47, 20.42s/it][A

tensor(0.6967, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:56<21:35, 20.56s/it][A

tensor(0.6337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:17<21:21, 20.67s/it][A

tensor(0.6797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:38<21:04, 20.73s/it][A

tensor(0.6925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:58<20:46, 20.78s/it][A

tensor(0.6378, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:19<20:29, 20.83s/it][A

tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:39<19:47, 20.47s/it][A

tensor(0.6840, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:00<19:33, 20.60s/it][A

tensor(0.6621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:21<19:21, 20.74s/it][A

tensor(0.7023, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:42<19:03, 20.79s/it][A

tensor(0.6656, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:03<18:45, 20.85s/it][A

tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:24<18:25, 20.85s/it][A

tensor(0.6306, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:45<18:05, 20.87s/it][A

tensor(0.6436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:06<17:46, 20.91s/it][A

tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:27<17:26, 20.92s/it][A

tensor(0.6572, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:48<17:05, 20.93s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:08<16:44, 20.93s/it][A

tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:29<16:24, 20.95s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:50<16:02, 20.92s/it][A

tensor(0.5773, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:11<15:41, 20.92s/it][A

tensor(0.6532, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:32<15:21, 20.95s/it][A

tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:53<15:01, 20.97s/it][A

tensor(0.6934, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:14<14:39, 20.93s/it][A

tensor(0.6336, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:35<14:17, 20.92s/it][A

tensor(0.7052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:56<13:55, 20.89s/it][A

tensor(0.6822, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:17<13:34, 20.89s/it][A

tensor(0.6060, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:38<13:13, 20.89s/it][A

tensor(0.6403, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:58<12:52, 20.87s/it][A

tensor(0.6435, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:19<12:31, 20.86s/it][A

tensor(0.6615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:40<12:10, 20.87s/it][A

tensor(0.6393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:01<11:49, 20.87s/it][A

tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:22<11:29, 20.91s/it][A

tensor(0.6853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:43<11:09, 20.91s/it][A

tensor(0.6352, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:04<10:48, 20.92s/it][A

tensor(0.6299, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:25<10:27, 20.92s/it][A

tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:46<10:06, 20.92s/it][A

tensor(0.6482, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:07<09:46, 20.94s/it][A

tensor(0.5988, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:28<09:25, 20.96s/it][A

tensor(0.5991, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:49<09:05, 20.97s/it][A

tensor(0.6256, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:10<08:43, 20.94s/it][A

tensor(0.6987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:31<08:22, 20.93s/it][A

tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:51<08:00, 20.90s/it][A

tensor(0.6815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:12<07:40, 20.91s/it][A

tensor(0.6485, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:33<07:19, 20.94s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:54<06:58, 20.91s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:15<06:37, 20.93s/it][A

tensor(0.6510, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:36<06:16, 20.89s/it][A

tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:57<05:55, 20.89s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:18<05:34, 20.90s/it][A

tensor(0.6380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:38<05:09, 20.60s/it][A

tensor(0.6167, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:58<04:45, 20.40s/it][A

tensor(0.6575, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:18<04:24, 20.31s/it][A

tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:38<04:03, 20.27s/it][A

tensor(0.6361, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:58<03:42, 20.25s/it][A

tensor(0.6719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:18<03:22, 20.20s/it][A

tensor(0.6077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:38<03:01, 20.21s/it][A

tensor(0.6717, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:59<02:41, 20.23s/it][A

tensor(0.6575, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:19<02:21, 20.26s/it][A

tensor(0.8032, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:39<02:01, 20.26s/it][A

tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:00<01:41, 20.28s/it][A

tensor(0.6529, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:20<01:21, 20.29s/it][A

tensor(0.6298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:40<01:00, 20.27s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:00<00:40, 20.30s/it][A

tensor(0.6443, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:21<00:20, 20.27s/it][A

tensor(0.6890, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:28<00:00, 20.58s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6533842000616602

	train acc: 0.6496269167579409

	training prec: 0.8357832676839093

	training rec: 0.6496269167579409

	training f1: 0.7024698702560601

	Current Learning rate:  4.5714285714285716e-05



  2%|▏         | 1/42 [00:02<01:42,  2.49s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.52s/it][A
  7%|▋         | 3/42 [00:07<01:37,  2.51s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:30,  2.51s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.52s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.53s/it][A
 21%|██▏       | 9/42 [00:22<01:22,  2.51s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.53s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.53s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.51s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.52s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.53s/it][A
 36%|███▌      | 15/42 [00:37<01:07,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.53s/it][A
 40%|████      | 17/42 [00:42<01:03,  2.53s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.51s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.52s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation loss: 0.6344635429836455

	Validation acc: 0.5972222222222222

	Validation prec: 0.8677431108130568

	Validation rec: 0.5972222222222222

	Validation f1: 0.6676898825120622
loss: 


  1%|          | 1/83 [00:20<27:59, 20.49s/it][A

tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:36, 20.45s/it][A

tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:17, 20.47s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:21<26:56, 20.47s/it][A

tensor(0.6968, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:42<26:33, 20.43s/it][A

tensor(0.5855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:02<26:10, 20.40s/it][A

tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:22<25:49, 20.39s/it][A

tensor(0.6082, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:43<25:28, 20.38s/it][A

tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:03<25:07, 20.37s/it][A

tensor(0.6567, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:23<24:44, 20.34s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:44<24:23, 20.33s/it][A

tensor(0.6242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:04<24:02, 20.31s/it][A

tensor(0.6783, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:24<23:42, 20.32s/it][A

tensor(0.6763, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:45<23:21, 20.31s/it][A

tensor(0.6760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:05<23:00, 20.31s/it][A

tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:25<22:40, 20.31s/it][A

tensor(0.6880, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:46<22:21, 20.32s/it][A

tensor(0.5996, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:06<21:59, 20.30s/it][A

tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:26<21:39, 20.31s/it][A

tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:47<21:20, 20.33s/it][A

tensor(0.6532, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:07<20:59, 20.32s/it][A

tensor(0.6895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:27<20:39, 20.32s/it][A

tensor(0.6298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:47<20:18, 20.31s/it][A

tensor(0.6926, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:08<19:58, 20.31s/it][A

tensor(0.6775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:28<19:38, 20.31s/it][A

tensor(0.6145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:48<19:14, 20.25s/it][A

tensor(0.6594, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:08<18:49, 20.18s/it][A

tensor(0.5978, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:28<18:29, 20.17s/it][A

tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:48<18:08, 20.16s/it][A

tensor(0.6270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:09<17:46, 20.12s/it][A

tensor(0.6662, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:29<17:25, 20.10s/it][A

tensor(0.6322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:49<17:04, 20.09s/it][A

tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:09<16:44, 20.09s/it][A

tensor(0.6633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:29<16:23, 20.08s/it][A

tensor(0.5980, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:49<16:02, 20.06s/it][A

tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:09<15:43, 20.07s/it][A

tensor(0.6517, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:29<15:22, 20.05s/it][A

tensor(0.6091, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:49<15:02, 20.06s/it][A

tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:09<14:41, 20.04s/it][A

tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:29<14:22, 20.06s/it][A

tensor(0.6611, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:49<14:02, 20.05s/it][A

tensor(0.5714, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:09<13:43, 20.08s/it][A

tensor(0.6119, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:29<13:22, 20.06s/it][A

tensor(0.6000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:49<13:02, 20.07s/it][A

tensor(0.6115, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:09<12:41, 20.04s/it][A

tensor(0.7415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:29<12:21, 20.04s/it][A

tensor(0.5982, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:49<12:01, 20.05s/it][A

tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:09<11:41, 20.05s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:30<11:21, 20.05s/it][A

tensor(0.7038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:50<11:02, 20.07s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:10<10:43, 20.10s/it][A

tensor(0.6091, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:30<10:22, 20.08s/it][A

tensor(0.5848, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:50<10:02, 20.08s/it][A

tensor(0.6478, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:10<09:42, 20.08s/it][A

tensor(0.5818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:30<09:21, 20.07s/it][A

tensor(0.6821, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [18:50<09:02, 20.08s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:10<08:42, 20.09s/it][A

tensor(0.7160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:30<08:22, 20.09s/it][A

tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [19:50<08:02, 20.09s/it][A

tensor(0.5780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:11<07:42, 20.09s/it][A

tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:31<07:22, 20.13s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [20:51<07:02, 20.12s/it][A

tensor(0.6389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:11<06:42, 20.10s/it][A

tensor(0.7242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:31<06:21, 20.09s/it][A

tensor(0.5844, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [21:51<06:01, 20.07s/it][A

tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:11<05:40, 20.06s/it][A

tensor(0.6839, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:31<05:21, 20.08s/it][A

tensor(0.6555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [22:51<05:00, 20.06s/it][A

tensor(0.6825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:12<04:44, 20.33s/it][A

tensor(0.6316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:33<04:26, 20.51s/it][A

tensor(0.6157, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [23:54<04:08, 20.68s/it][A

tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:15<03:48, 20.78s/it][A

tensor(0.6630, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:36<03:28, 20.88s/it][A

tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [24:57<03:08, 20.92s/it][A

tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:18<02:47, 20.96s/it][A

tensor(0.6573, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:39<02:26, 20.97s/it][A

tensor(0.6738, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:00<02:05, 20.99s/it][A

tensor(0.5936, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:21<01:44, 20.97s/it][A

tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:42<01:23, 21.00s/it][A

tensor(0.6776, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:03<01:02, 20.97s/it][A

tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:23<00:41, 20.73s/it][A

tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:44<00:20, 20.82s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:52<00:00, 20.15s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6440159355301455

	train acc: 0.6470084884994524

	training prec: 0.8414113485507522

	training rec: 0.6470084884994524

	training f1: 0.7047433749746383

	Current Learning rate:  4.428571428571428e-05



  2%|▏         | 1/42 [00:02<01:46,  2.59s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.63s/it][A
  7%|▋         | 3/42 [00:07<01:37,  2.51s/it][A
 10%|▉         | 4/42 [00:09<01:32,  2.44s/it][A
 12%|█▏        | 5/42 [00:12<01:30,  2.44s/it][A
 14%|█▍        | 6/42 [00:14<01:27,  2.42s/it][A
 17%|█▋        | 7/42 [00:17<01:24,  2.42s/it][A
 19%|█▉        | 8/42 [00:19<01:24,  2.49s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.54s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.56s/it][A
 26%|██▌       | 11/42 [00:27<01:20,  2.59s/it][A
 29%|██▊       | 12/42 [00:30<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:32<01:15,  2.60s/it][A
 33%|███▎      | 14/42 [00:35<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:38<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:40<01:07,  2.61s/it][A
 40%|████      | 17/42 [00:43<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:46<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:48<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:51<00:57,  2


	Validation loss: 0.6287299195925394

	Validation acc: 0.5565476190476191

	Validation prec: 0.8740474884528886

	Validation rec: 0.5565476190476191

	Validation f1: 0.6275183161706055
loss: 


  1%|          | 1/83 [00:18<25:36, 18.74s/it][A

tensor(0.6217, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:37<25:20, 18.77s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [00:57<25:32, 19.16s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:18<26:16, 19.96s/it][A

tensor(0.6792, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:38<25:55, 19.95s/it][A

tensor(0.6330, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [01:59<26:05, 20.34s/it][A

tensor(0.6215, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:20<26:03, 20.57s/it][A

tensor(0.6447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:41<25:53, 20.71s/it][A

tensor(0.5932, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:02<25:40, 20.82s/it][A

tensor(0.5667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:23<25:25, 20.90s/it][A

tensor(0.6144, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:44<25:08, 20.95s/it][A

tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:05<24:53, 21.03s/it][A

tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:26<24:33, 21.05s/it][A

tensor(0.6184, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:47<24:12, 21.05s/it][A

tensor(0.5906, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:09<23:55, 21.10s/it][A

tensor(0.6316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:29, 21.04s/it][A

tensor(0.7436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:51<23:10, 21.06s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:47, 21.04s/it][A

tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:33<22:26, 21.04s/it][A

tensor(0.5762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:54<22:03, 21.02s/it][A

tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:15<21:43, 21.02s/it][A

tensor(0.6429, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:36<21:21, 21.01s/it][A

tensor(0.5511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:57<21:01, 21.02s/it][A

tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:18<20:38, 21.00s/it][A

tensor(0.7255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:38<20:06, 20.80s/it][A

tensor(0.5938, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:59<19:43, 20.77s/it][A

tensor(0.6341, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:19<19:21, 20.74s/it][A

tensor(0.6197, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:40<19:06, 20.84s/it][A

tensor(0.5860, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:02<18:48, 20.90s/it][A

tensor(0.7089, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:23<18:30, 20.95s/it][A

tensor(0.6497, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:44<18:10, 20.98s/it][A

tensor(0.6141, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:05<17:48, 20.96s/it][A

tensor(0.6524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:26<17:28, 20.98s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:47<17:08, 20.98s/it][A

tensor(0.6045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:07<16:45, 20.95s/it][A

tensor(0.6524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:28<16:25, 20.97s/it][A

tensor(0.6865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:50<16:05, 21.00s/it][A

tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:10<15:44, 20.98s/it][A

tensor(0.6024, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:31<15:23, 20.99s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:52<15:02, 20.99s/it][A

tensor(0.6547, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:13<14:35, 20.83s/it][A

tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:33<14:07, 20.68s/it][A

tensor(0.5782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:54<13:43, 20.59s/it][A

tensor(0.6408, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:14<13:19, 20.51s/it][A

tensor(0.6785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:34<12:56, 20.44s/it][A

tensor(0.5428, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:55<12:35, 20.43s/it][A

tensor(0.6108, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:15<12:14, 20.42s/it][A

tensor(0.6138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:35<11:53, 20.39s/it][A

tensor(0.7043, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:56<11:33, 20.39s/it][A

tensor(0.6473, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:16<11:13, 20.41s/it][A

tensor(0.6750, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:37<10:52, 20.38s/it][A

tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:57<10:31, 20.38s/it][A

tensor(0.5854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:17<10:10, 20.36s/it][A

tensor(0.6131, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:38<09:50, 20.35s/it][A

tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:58<09:29, 20.35s/it][A

tensor(0.6417, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:18<09:09, 20.37s/it][A

tensor(0.5546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:38<08:48, 20.32s/it][A

tensor(0.6121, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:59<08:27, 20.29s/it][A

tensor(0.5528, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:19<08:06, 20.27s/it][A

tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:39<07:46, 20.27s/it][A

tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:00<07:26, 20.28s/it][A

tensor(0.7025, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:20<07:06, 20.30s/it][A

tensor(0.7040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:40<06:45, 20.28s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:00<06:25, 20.28s/it][A

tensor(0.6366, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:21<06:05, 20.31s/it][A

tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:41<05:45, 20.33s/it][A

tensor(0.6659, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:01<05:25, 20.34s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:22<05:05, 20.37s/it][A

tensor(0.5869, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:42<04:44, 20.35s/it][A

tensor(0.6196, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:02<04:24, 20.33s/it][A

tensor(0.6232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:23<04:03, 20.30s/it][A

tensor(0.6320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:43<03:43, 20.29s/it][A

tensor(0.6070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:03<03:22, 20.28s/it][A

tensor(0.6081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:24<03:02, 20.31s/it][A

tensor(0.6012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:44<02:42, 20.30s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:04<02:22, 20.30s/it][A

tensor(0.6411, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:25<02:01, 20.32s/it][A

tensor(0.6337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:45<01:41, 20.32s/it][A

tensor(0.5936, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:05<01:21, 20.30s/it][A

tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:25<01:00, 20.30s/it][A

tensor(0.5928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:46<00:40, 20.31s/it][A

tensor(0.5951, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:06<00:20, 20.31s/it][A

tensor(0.6470, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:13<00:00, 20.40s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5883, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6297838113394129

	train acc: 0.6486856516976999

	training prec: 0.8526709942787868

	training rec: 0.6486856516976999

	training f1: 0.7068755671927202

	Current Learning rate:  4.2857142857142856e-05



  2%|▏         | 1/42 [00:02<01:44,  2.56s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.50s/it][A
  7%|▋         | 3/42 [00:07<01:38,  2.52s/it][A
 10%|▉         | 4/42 [00:10<01:36,  2.53s/it][A
 12%|█▏        | 5/42 [00:12<01:32,  2.51s/it][A
 14%|█▍        | 6/42 [00:15<01:30,  2.52s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.53s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.51s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.52s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.53s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.52s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.52s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.51s/it][A
 36%|███▌      | 15/42 [00:37<01:07,  2.51s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.52s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.51s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.52s/it][A
 45%|████▌     | 19/42 [00:47<00:58,  2.53s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation loss: 0.6214798958528609

	Validation acc: 0.5758928571428571

	Validation prec: 0.8711738145981686

	Validation rec: 0.5758928571428571

	Validation f1: 0.6490060364438472
loss: 


  1%|          | 1/83 [00:20<27:45, 20.31s/it][A

tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:28, 20.36s/it][A

tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:07, 20.34s/it][A

tensor(0.6318, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:21<26:46, 20.34s/it][A

tensor(0.6149, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:41<26:25, 20.33s/it][A

tensor(0.5921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:02<26:06, 20.35s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:22<25:45, 20.34s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:42<25:24, 20.33s/it][A

tensor(0.5706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:03<25:05, 20.35s/it][A

tensor(0.6143, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:23<24:43, 20.32s/it][A

tensor(0.5707, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:43<24:23, 20.32s/it][A

tensor(0.6541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:04<24:03, 20.33s/it][A

tensor(0.6071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:25<24:14, 20.78s/it][A

tensor(0.7310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:47<24:17, 21.13s/it][A

tensor(0.6249, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:08<23:52, 21.06s/it][A

tensor(0.6070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:29<23:30, 21.05s/it][A

tensor(0.5982, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:50<23:08, 21.04s/it][A

tensor(0.6970, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:11<22:45, 21.00s/it][A

tensor(0.7298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:32<22:22, 20.97s/it][A

tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:53<22:00, 20.97s/it][A

tensor(0.5520, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:14<21:38, 20.94s/it][A

tensor(0.6284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:35<21:16, 20.92s/it][A

tensor(0.6844, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:56<20:53, 20.89s/it][A

tensor(0.6461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:16<20:32, 20.89s/it][A

tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:37<20:12, 20.91s/it][A

tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:58<19:51, 20.91s/it][A

tensor(0.6200, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:19<19:30, 20.90s/it][A

tensor(0.6290, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:40<19:08, 20.88s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:01<18:47, 20.88s/it][A

tensor(0.5942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:22<18:25, 20.85s/it][A

tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:42<18:01, 20.79s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:03<17:42, 20.83s/it][A

tensor(0.5645, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:24<17:23, 20.88s/it][A

tensor(0.6242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:45<17:03, 20.88s/it][A

tensor(0.7457, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:06<16:41, 20.87s/it][A

tensor(0.6777, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:27<16:20, 20.87s/it][A

tensor(0.6878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:48<16:00, 20.88s/it][A

tensor(0.6628, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:09<15:39, 20.88s/it][A

tensor(0.5845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:30<15:19, 20.89s/it][A

tensor(0.5562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:50<14:58, 20.89s/it][A

tensor(0.6261, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:11<14:39, 20.94s/it][A

tensor(0.5917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:32<14:17, 20.93s/it][A

tensor(0.6248, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:53<13:58, 20.95s/it][A

tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:14<13:36, 20.93s/it][A

tensor(0.6979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:35<13:11, 20.83s/it][A

tensor(0.6460, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:56<12:51, 20.86s/it][A

tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:17<12:31, 20.87s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:38<12:10, 20.86s/it][A

tensor(0.6288, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:58<11:47, 20.80s/it][A

tensor(0.5830, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:19<11:27, 20.84s/it][A

tensor(0.5964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:40<11:08, 20.89s/it][A

tensor(0.6107, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:01<10:47, 20.87s/it][A

tensor(0.7065, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:21<10:18, 20.63s/it][A

tensor(0.6792, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:42<09:58, 20.64s/it][A

tensor(0.6900, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:03<09:39, 20.71s/it][A

tensor(0.6080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:23<09:20, 20.75s/it][A

tensor(0.6310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:44<09:00, 20.77s/it][A

tensor(0.5809, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:05<08:39, 20.80s/it][A

tensor(0.7022, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:26<08:19, 20.82s/it][A

tensor(0.7357, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:47<07:59, 20.83s/it][A

tensor(0.6435, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:08<07:38, 20.84s/it][A

tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:29<07:17, 20.85s/it][A

tensor(0.6772, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:49<06:56, 20.84s/it][A

tensor(0.6762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:10<06:35, 20.83s/it][A

tensor(0.5984, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:31<06:14, 20.81s/it][A

tensor(0.5771, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:52<05:53, 20.80s/it][A

tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:13<05:32, 20.81s/it][A

tensor(0.5660, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:33<05:12, 20.83s/it][A

tensor(0.6474, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:54<04:49, 20.65s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:14<04:29, 20.70s/it][A

tensor(0.6228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:35<04:07, 20.65s/it][A

tensor(0.6079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:56<03:47, 20.71s/it][A

tensor(0.5461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:17<03:27, 20.74s/it][A

tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:38<03:06, 20.77s/it][A

tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:58<02:46, 20.80s/it][A

tensor(0.6378, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:19<02:25, 20.81s/it][A

tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:40<02:04, 20.81s/it][A

tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:01<01:44, 20.82s/it][A

tensor(0.6974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:22<01:23, 20.81s/it][A

tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:42<01:02, 20.82s/it][A

tensor(0.6472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:03<00:41, 20.82s/it][A

tensor(0.7203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:24<00:20, 20.82s/it][A

tensor(0.6059, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:31<00:00, 20.62s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6653, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6337880765099123

	train acc: 0.6537513691128148

	training prec: 0.8418000472059699

	training rec: 0.6537513691128148

	training f1: 0.7098956759603103

	Current Learning rate:  4.1428571428571437e-05



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.62s/it][A
  7%|▋         | 3/42 [00:07<01:40,  2.59s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.60s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.59s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.60s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.61s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.59s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.60s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.61s/it][A
 29%|██▊       | 12/42 [00:31<01:17,  2.60s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.60s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.61s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:46<01:02,  2.60s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6134282889820281

	Validation acc: 0.5788690476190477

	Validation prec: 0.8666669248930992

	Validation rec: 0.5788690476190477

	Validation f1: 0.6471408267919008
loss: 


  1%|          | 1/83 [00:20<27:47, 20.34s/it][A

tensor(0.5822, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:27, 20.34s/it][A

tensor(0.6431, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:06, 20.33s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:21<26:47, 20.35s/it][A

tensor(0.7184, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:41<26:27, 20.35s/it][A

tensor(0.5536, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:01<26:04, 20.32s/it][A

tensor(0.6507, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:22<25:43, 20.31s/it][A

tensor(0.5864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:42<25:23, 20.31s/it][A

tensor(0.5954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:02<25:02, 20.31s/it][A

tensor(0.5918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:23<24:42, 20.30s/it][A

tensor(0.6169, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:43<24:20, 20.29s/it][A

tensor(0.6414, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:03<24:00, 20.28s/it][A

tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:23<23:39, 20.27s/it][A

tensor(0.5592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:44<23:18, 20.26s/it][A

tensor(0.5111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:04<22:57, 20.26s/it][A

tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:24<22:38, 20.27s/it][A

tensor(0.7245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:45<22:18, 20.28s/it][A

tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:05<21:58, 20.29s/it][A

tensor(0.5806, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:25<21:38, 20.29s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:46<21:19, 20.32s/it][A

tensor(0.5586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:06<20:58, 20.30s/it][A

tensor(0.6482, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:26<20:37, 20.28s/it][A

tensor(0.6799, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:46<20:17, 20.30s/it][A

tensor(0.7697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:07<19:57, 20.29s/it][A

tensor(0.5971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:27<19:36, 20.28s/it][A

tensor(0.6445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:47<19:15, 20.28s/it][A

tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:07<18:54, 20.27s/it][A

tensor(0.6246, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:28<18:34, 20.27s/it][A

tensor(0.6681, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:48<18:13, 20.26s/it][A

tensor(0.7127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:08<17:53, 20.26s/it][A

tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:28<17:32, 20.25s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:49<17:12, 20.25s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:09<16:52, 20.25s/it][A

tensor(0.6356, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:29<16:32, 20.26s/it][A

tensor(0.6013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:49<16:11, 20.25s/it][A

tensor(0.7053, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:10<15:53, 20.29s/it][A

tensor(0.7224, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:30<15:31, 20.26s/it][A

tensor(0.6148, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:50<15:11, 20.25s/it][A

tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:10<14:50, 20.24s/it][A

tensor(0.5732, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:31<14:31, 20.27s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:51<14:11, 20.26s/it][A

tensor(0.6142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:11<13:51, 20.27s/it][A

tensor(0.6298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:32<13:30, 20.26s/it][A

tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:52<13:10, 20.27s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:12<12:49, 20.26s/it][A

tensor(0.5907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:32<12:29, 20.27s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:53<12:09, 20.26s/it][A

tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:13<11:49, 20.27s/it][A

tensor(0.6123, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:33<11:28, 20.26s/it][A

tensor(0.6444, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:53<11:08, 20.26s/it][A

tensor(0.6726, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:14<10:48, 20.27s/it][A

tensor(0.6661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:34<10:29, 20.29s/it][A

tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:54<10:08, 20.29s/it][A

tensor(0.7077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:15<09:48, 20.28s/it][A

tensor(0.6672, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:35<09:27, 20.27s/it][A

tensor(0.5732, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [18:55<09:07, 20.27s/it][A

tensor(0.6249, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:15<08:47, 20.27s/it][A

tensor(0.5315, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:36<08:26, 20.27s/it][A

tensor(0.6990, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [19:56<08:06, 20.27s/it][A

tensor(0.5907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:16<07:46, 20.26s/it][A

tensor(0.6296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:36<07:25, 20.27s/it][A

tensor(0.5625, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [20:57<07:05, 20.26s/it][A

tensor(0.5930, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:17<06:45, 20.26s/it][A

tensor(0.6307, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:37<06:25, 20.27s/it][A

tensor(0.6851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [21:57<06:04, 20.26s/it][A

tensor(0.5778, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:18<05:44, 20.26s/it][A

tensor(0.6037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:38<05:24, 20.28s/it][A

tensor(0.6278, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [22:58<05:03, 20.26s/it][A

tensor(0.5861, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:18<04:43, 20.25s/it][A

tensor(0.6515, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:39<04:23, 20.27s/it][A

tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [23:59<04:03, 20.28s/it][A

tensor(0.6986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:19<03:43, 20.28s/it][A

tensor(0.6331, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:40<03:22, 20.28s/it][A

tensor(0.5826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:00<03:02, 20.27s/it][A

tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:20<02:42, 20.27s/it][A

tensor(0.5181, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:40<02:21, 20.26s/it][A

tensor(0.7095, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:01<02:01, 20.26s/it][A

tensor(0.5790, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:21<01:41, 20.26s/it][A

tensor(0.5657, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:41<01:21, 20.27s/it][A

tensor(0.6932, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:01<01:00, 20.25s/it][A

tensor(0.6607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:22<00:40, 20.26s/it][A

tensor(0.6420, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:42<00:20, 20.25s/it][A

tensor(0.5977, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:49<00:00, 20.11s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6433, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.627562793622534

	train acc: 0.6376300657174151

	training prec: 0.8457409910688792

	training rec: 0.6376300657174151

	training f1: 0.6978180857400053

	Current Learning rate:  4e-05



  2%|▏         | 1/42 [00:02<01:42,  2.49s/it][A
  5%|▍         | 2/42 [00:05<01:41,  2.54s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.55s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.54s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.52s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.54s/it][A
 21%|██▏       | 9/42 [00:22<01:24,  2.55s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.53s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.54s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.55s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.53s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.54s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.55s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.53s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.54s/it][A
 43%|████▎     | 18/42 [00:45<01:01,  2.55s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.53s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation loss: 0.6026668981427238

	Validation acc: 0.6138392857142857

	Validation prec: 0.8682015007873274

	Validation rec: 0.6138392857142857

	Validation f1: 0.6800168996845141
loss: 


  1%|          | 1/83 [00:20<27:46, 20.33s/it][A

tensor(0.5953, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:26, 20.33s/it][A

tensor(0.5726, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:00<27:05, 20.32s/it][A

tensor(0.7005, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:21<26:44, 20.31s/it][A

tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:41<26:23, 20.31s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:01<26:03, 20.31s/it][A

tensor(0.6372, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:22<25:41, 20.28s/it][A

tensor(0.5752, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:42<25:20, 20.27s/it][A

tensor(0.5946, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:02<24:59, 20.27s/it][A

tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:22<24:38, 20.25s/it][A

tensor(0.5640, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:43<24:18, 20.26s/it][A

tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:03<23:57, 20.25s/it][A

tensor(0.6143, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:23<23:37, 20.25s/it][A

tensor(0.5910, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:43<23:17, 20.25s/it][A

tensor(0.7058, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:04<22:58, 20.27s/it][A

tensor(0.6582, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:24<22:37, 20.26s/it][A

tensor(0.5259, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:44<22:16, 20.25s/it][A

tensor(0.6020, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:05<21:59, 20.30s/it][A

tensor(0.5119, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:25<21:34, 20.22s/it][A

tensor(0.5826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:45<21:24, 20.39s/it][A

tensor(0.6727, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:06<21:12, 20.52s/it][A

tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:27<20:57, 20.61s/it][A

tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:48<20:39, 20.66s/it][A

tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:08<20:07, 20.47s/it][A

tensor(0.6112, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:28<19:36, 20.29s/it][A

tensor(0.6869, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:47<19:07, 20.13s/it][A

tensor(0.5979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:07<18:42, 20.04s/it][A

tensor(0.5681, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:27<18:18, 19.97s/it][A

tensor(0.6250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:47<17:56, 19.93s/it][A

tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:07<17:35, 19.92s/it][A

tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:27<17:14, 19.89s/it][A

tensor(0.7428, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:46<16:52, 19.85s/it][A

tensor(0.5627, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:06<16:31, 19.84s/it][A

tensor(0.6735, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:26<16:11, 19.84s/it][A

tensor(0.6790, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:46<15:52, 19.84s/it][A

tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:06<15:32, 19.84s/it][A

tensor(0.6660, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:26<15:12, 19.84s/it][A

tensor(0.6276, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:45<14:52, 19.84s/it][A

tensor(0.6187, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:05<14:33, 19.85s/it][A

tensor(0.5581, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:25<14:14, 19.87s/it][A

tensor(0.5650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:45<13:55, 19.89s/it][A

tensor(0.6645, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:05<13:35, 19.90s/it][A

tensor(0.6248, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:25<13:16, 19.90s/it][A

tensor(0.6675, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:45<12:56, 19.91s/it][A

tensor(0.5970, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:05<12:36, 19.91s/it][A

tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:25<12:18, 19.96s/it][A

tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:45<11:59, 19.98s/it][A

tensor(0.5737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:05<11:39, 19.99s/it][A

tensor(0.7546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:25<11:19, 19.99s/it][A

tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:45<10:59, 19.98s/it][A

tensor(0.6904, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:05<10:39, 19.98s/it][A

tensor(0.5998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:25<10:20, 20.00s/it][A

tensor(0.5846, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:45<09:59, 19.99s/it][A

tensor(0.5561, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:05<09:40, 20.01s/it][A

tensor(0.6175, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:25<09:20, 20.01s/it][A

tensor(0.6232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [18:45<09:00, 20.01s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:05<08:39, 20.00s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:25<08:20, 20.02s/it][A

tensor(0.6852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [19:45<08:00, 20.01s/it][A

tensor(0.5796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:05<07:40, 20.02s/it][A

tensor(0.5629, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:25<07:20, 20.03s/it][A

tensor(0.6546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [20:45<07:00, 20.02s/it][A

tensor(0.6021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:05<06:39, 19.99s/it][A

tensor(0.5538, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:25<06:20, 20.00s/it][A

tensor(0.5961, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [21:45<06:00, 20.01s/it][A

tensor(0.5842, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:05<05:40, 20.04s/it][A

tensor(0.5808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:25<05:20, 20.04s/it][A

tensor(0.6856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [22:45<05:00, 20.04s/it][A

tensor(0.5845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:05<04:40, 20.04s/it][A

tensor(0.5662, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:25<04:20, 20.05s/it][A

tensor(0.5945, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [23:45<04:00, 20.04s/it][A

tensor(0.5715, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:05<03:40, 20.05s/it][A

tensor(0.6544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:25<03:20, 20.05s/it][A

tensor(0.6970, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [24:46<03:00, 20.05s/it][A

tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:06<02:40, 20.05s/it][A

tensor(0.5514, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:26<02:20, 20.06s/it][A

tensor(0.6249, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [25:46<02:00, 20.07s/it][A

tensor(0.6098, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:06<01:40, 20.07s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:26<01:20, 20.06s/it][A

tensor(0.6731, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [26:46<01:00, 20.05s/it][A

tensor(0.5985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:06<00:40, 20.04s/it][A

tensor(0.5925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:26<00:19, 19.99s/it][A

tensor(0.7012, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:33<00:00, 19.92s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5624, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6211519327508398

	train acc: 0.6578244797371303

	training prec: 0.850899435924343

	training rec: 0.6578244797371303

	training f1: 0.7157252717538092

	Current Learning rate:  3.857142857142858e-05



  2%|▏         | 1/42 [00:02<01:41,  2.48s/it][A
  5%|▍         | 2/42 [00:04<01:37,  2.43s/it][A
  7%|▋         | 3/42 [00:07<01:35,  2.45s/it][A
 10%|▉         | 4/42 [00:09<01:33,  2.46s/it][A
 12%|█▏        | 5/42 [00:12<01:30,  2.44s/it][A
 14%|█▍        | 6/42 [00:14<01:28,  2.45s/it][A
 17%|█▋        | 7/42 [00:17<01:25,  2.45s/it][A
 19%|█▉        | 8/42 [00:19<01:22,  2.44s/it][A
 21%|██▏       | 9/42 [00:22<01:20,  2.45s/it][A
 24%|██▍       | 10/42 [00:24<01:18,  2.46s/it][A
 26%|██▌       | 11/42 [00:26<01:15,  2.44s/it][A
 29%|██▊       | 12/42 [00:29<01:13,  2.44s/it][A
 31%|███       | 13/42 [00:31<01:11,  2.46s/it][A
 33%|███▎      | 14/42 [00:34<01:08,  2.44s/it][A
 36%|███▌      | 15/42 [00:36<01:06,  2.45s/it][A
 38%|███▊      | 16/42 [00:39<01:03,  2.45s/it][A
 40%|████      | 17/42 [00:41<01:00,  2.44s/it][A
 43%|████▎     | 18/42 [00:44<00:58,  2.45s/it][A
 45%|████▌     | 19/42 [00:46<00:56,  2.45s/it][A
 48%|████▊     | 20/42 [00:48<00:53,  2


	Validation loss: 0.5945524459793454

	Validation acc: 0.6912202380952381

	Validation prec: 0.8577612936493056

	Validation rec: 0.6912202380952381

	Validation f1: 0.7433883899678598
loss: 


  1%|          | 1/83 [00:19<27:10, 19.88s/it][A

tensor(0.5666, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:39<26:49, 19.87s/it][A

tensor(0.6013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [00:59<26:32, 19.91s/it][A

tensor(0.5357, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:19<26:12, 19.91s/it][A

tensor(0.5346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:39<25:51, 19.90s/it][A

tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [01:59<25:31, 19.89s/it][A

tensor(0.6286, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:19<25:11, 19.89s/it][A

tensor(0.7216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:39<24:52, 19.90s/it][A

tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [02:59<24:32, 19.90s/it][A

tensor(0.6644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:18<24:11, 19.89s/it][A

tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:38<23:51, 19.88s/it][A

tensor(0.6553, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [03:58<23:31, 19.87s/it][A

tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:18<23:11, 19.88s/it][A

tensor(0.5513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:38<22:54, 19.91s/it][A

tensor(0.6182, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [04:58<22:37, 19.96s/it][A

tensor(0.5864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:18<22:19, 19.99s/it][A

tensor(0.5779, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:38<22:00, 20.01s/it][A

tensor(0.6479, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [05:58<21:41, 20.02s/it][A

tensor(0.5954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:18<21:21, 20.03s/it][A

tensor(0.6772, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:38<21:02, 20.04s/it][A

tensor(0.6665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [06:59<20:44, 20.07s/it][A

tensor(0.6573, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:19<20:24, 20.08s/it][A

tensor(0.6091, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:39<20:04, 20.08s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [07:59<19:45, 20.10s/it][A

tensor(0.6006, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:19<19:26, 20.11s/it][A

tensor(0.6342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:39<19:05, 20.10s/it][A

tensor(0.7283, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [08:59<18:45, 20.09s/it][A

tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:19<18:24, 20.09s/it][A

tensor(0.6139, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:39<18:04, 20.07s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [09:59<17:43, 20.07s/it][A

tensor(0.6399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:19<17:22, 20.06s/it][A

tensor(0.5764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:39<17:02, 20.05s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [10:59<16:42, 20.05s/it][A

tensor(0.6274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:20<16:22, 20.06s/it][A

tensor(0.5531, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:40<16:02, 20.06s/it][A

tensor(0.5702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:00<15:42, 20.05s/it][A

tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:20<15:22, 20.06s/it][A

tensor(0.6447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:40<15:02, 20.05s/it][A

tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:00<14:44, 20.11s/it][A

tensor(0.6750, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:20<14:24, 20.10s/it][A

tensor(0.6701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:40<14:05, 20.12s/it][A

tensor(0.5997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:00<13:44, 20.11s/it][A

tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:20<13:24, 20.10s/it][A

tensor(0.6572, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:40<13:04, 20.11s/it][A

tensor(0.6718, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:01<12:43, 20.09s/it][A

tensor(0.6159, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:21<12:23, 20.10s/it][A

tensor(0.6513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:41<12:04, 20.13s/it][A

tensor(0.6293, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:01<11:44, 20.12s/it][A

tensor(0.5947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:21<11:24, 20.12s/it][A

tensor(0.5635, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:41<11:04, 20.13s/it][A

tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:01<10:44, 20.14s/it][A

tensor(0.6179, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:22<10:24, 20.15s/it][A

tensor(0.6167, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:42<10:04, 20.15s/it][A

tensor(0.5671, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:02<09:44, 20.16s/it][A

tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:22<09:25, 20.18s/it][A

tensor(0.7260, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [18:42<09:05, 20.21s/it][A

tensor(0.5774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:03<08:45, 20.23s/it][A

tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:23<08:24, 20.18s/it][A

tensor(0.5987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [19:43<08:03, 20.16s/it][A

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:03<07:43, 20.15s/it][A

tensor(0.6034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:23<07:23, 20.16s/it][A

tensor(0.5814, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [20:43<07:03, 20.15s/it][A

tensor(0.5573, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:03<06:43, 20.16s/it][A

tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:24<06:22, 20.14s/it][A

tensor(0.5882, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [21:44<06:02, 20.14s/it][A

tensor(0.7040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:04<05:42, 20.13s/it][A

tensor(0.7384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:24<05:22, 20.14s/it][A

tensor(0.6731, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [22:44<05:02, 20.14s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:04<04:42, 20.16s/it][A

tensor(0.6797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:25<04:22, 20.17s/it][A

tensor(0.5969, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [23:45<04:01, 20.16s/it][A

tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:05<03:41, 20.17s/it][A

tensor(0.4819, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:25<03:21, 20.15s/it][A

tensor(0.6749, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [24:45<03:01, 20.16s/it][A

tensor(0.5273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:05<02:41, 20.14s/it][A

tensor(0.7047, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:25<02:20, 20.14s/it][A

tensor(0.5809, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [25:46<02:00, 20.15s/it][A

tensor(0.6638, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:06<01:40, 20.17s/it][A

tensor(0.5743, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:26<01:20, 20.17s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [26:46<01:00, 20.17s/it][A

tensor(0.5984, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:06<00:40, 20.18s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:26<00:20, 20.17s/it][A

tensor(0.5613, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:33<00:00, 19.93s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7436, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6243044400071524

	train acc: 0.6462383625410734

	training prec: 0.8496689713509528

	training rec: 0.6462383625410734

	training f1: 0.7076258995722622

	Current Learning rate:  3.7142857142857143e-05



  2%|▏         | 1/42 [00:02<01:42,  2.50s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.50s/it][A
  7%|▋         | 3/42 [00:07<01:36,  2.47s/it][A
 10%|▉         | 4/42 [00:09<01:34,  2.48s/it][A
 12%|█▏        | 5/42 [00:12<01:31,  2.49s/it][A
 14%|█▍        | 6/42 [00:14<01:28,  2.47s/it][A
 17%|█▋        | 7/42 [00:17<01:26,  2.48s/it][A
 19%|█▉        | 8/42 [00:19<01:24,  2.48s/it][A
 21%|██▏       | 9/42 [00:22<01:21,  2.47s/it][A
 24%|██▍       | 10/42 [00:24<01:19,  2.48s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.49s/it][A
 29%|██▊       | 12/42 [00:29<01:14,  2.47s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.49s/it][A
 33%|███▎      | 14/42 [00:34<01:09,  2.49s/it][A
 36%|███▌      | 15/42 [00:37<01:06,  2.47s/it][A
 38%|███▊      | 16/42 [00:39<01:04,  2.49s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.49s/it][A
 43%|████▎     | 18/42 [00:44<00:59,  2.48s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.48s/it][A
 48%|████▊     | 20/42 [00:49<00:54,  2


	Validation loss: 0.6007650196552277

	Validation acc: 0.6121031746031745

	Validation prec: 0.859521401967571

	Validation rec: 0.6121031746031745

	Validation f1: 0.6779572909927337
loss: 


  1%|          | 1/83 [00:20<27:21, 20.01s/it][A

tensor(0.5788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:01, 20.01s/it][A

tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [00:59<26:37, 19.97s/it][A

tensor(0.7299, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:19<26:18, 19.98s/it][A

tensor(0.5763, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:39<25:56, 19.96s/it][A

tensor(0.6311, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [01:59<25:37, 19.96s/it][A

tensor(0.5457, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:19<25:15, 19.95s/it][A

tensor(0.6005, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:39<24:53, 19.92s/it][A

tensor(0.6185, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [02:59<24:40, 20.00s/it][A

tensor(0.6247, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:21<24:53, 20.46s/it][A

tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:41<24:24, 20.34s/it][A

tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:01<23:58, 20.26s/it][A

tensor(0.5967, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:21<23:33, 20.20s/it][A

tensor(0.5580, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:41<23:11, 20.17s/it][A

tensor(0.6454, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:01<22:51, 20.17s/it][A

tensor(0.5647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:21<22:29, 20.14s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:42<22:10, 20.16s/it][A

tensor(0.5966, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:02<22:00, 20.31s/it][A

tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:23<21:43, 20.37s/it][A

tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:43<21:27, 20.44s/it][A

tensor(0.7085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:04<21:06, 20.43s/it][A

tensor(0.5102, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:24<20:47, 20.45s/it][A

tensor(0.6173, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:45<20:26, 20.44s/it][A

tensor(0.5559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:05<20:06, 20.45s/it][A

tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:26<19:46, 20.46s/it][A

tensor(0.6907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:46<19:25, 20.45s/it][A

tensor(0.5859, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:06<19:04, 20.43s/it][A

tensor(0.6159, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:27<18:42, 20.42s/it][A

tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:47<18:22, 20.42s/it][A

tensor(0.5736, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:08<18:01, 20.40s/it][A

tensor(0.5774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:28<17:40, 20.40s/it][A

tensor(0.5341, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:48<17:20, 20.41s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:09<17:00, 20.41s/it][A

tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:29<16:39, 20.40s/it][A

tensor(0.6379, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:50<16:18, 20.39s/it][A

tensor(0.5629, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:10<15:59, 20.41s/it][A

tensor(0.6342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:30<15:38, 20.41s/it][A

tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:51<15:18, 20.41s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:11<14:57, 20.40s/it][A

tensor(0.6799, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:32<14:37, 20.40s/it][A

tensor(0.5557, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:52<14:16, 20.38s/it][A

tensor(0.5081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:12<13:55, 20.39s/it][A

tensor(0.7294, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:33<13:35, 20.39s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:53<13:15, 20.39s/it][A

tensor(0.5876, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:13<12:54, 20.38s/it][A

tensor(0.6316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:34<12:34, 20.39s/it][A

tensor(0.5876, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:54<12:13, 20.37s/it][A

tensor(0.5746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:15<11:52, 20.37s/it][A

tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:35<11:32, 20.36s/it][A

tensor(0.5865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:55<11:12, 20.37s/it][A

tensor(0.5218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:16<10:52, 20.39s/it][A

tensor(0.6251, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:36<10:31, 20.37s/it][A

tensor(0.6476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:56<10:11, 20.38s/it][A

tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:17<09:50, 20.37s/it][A

tensor(0.6746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:37<09:30, 20.37s/it][A

tensor(0.5236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [18:57<09:09, 20.35s/it][A

tensor(0.6330, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:18<08:49, 20.35s/it][A

tensor(0.5666, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:38<08:28, 20.34s/it][A

tensor(0.7030, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [19:59<08:08, 20.36s/it][A

tensor(0.5014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:19<07:48, 20.35s/it][A

tensor(0.5699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:39<07:27, 20.36s/it][A

tensor(0.5857, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:00<07:07, 20.36s/it][A

tensor(0.6444, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:20<06:47, 20.37s/it][A

tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:40<06:26, 20.35s/it][A

tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:01<06:06, 20.37s/it][A

tensor(0.6977, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:21<05:45, 20.35s/it][A

tensor(0.6329, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:41<05:25, 20.36s/it][A

tensor(0.6072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:02<05:05, 20.35s/it][A

tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:22<04:44, 20.35s/it][A

tensor(0.6263, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:42<04:24, 20.36s/it][A

tensor(0.6182, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:03<04:05, 20.49s/it][A

tensor(0.5624, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:24<03:46, 20.59s/it][A

tensor(0.6041, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:45<03:26, 20.67s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:06<03:06, 20.73s/it][A

tensor(0.6322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:25<02:42, 20.29s/it][A

tensor(0.5358, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:44<02:19, 19.98s/it][A

tensor(0.6616, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:05<02:00, 20.09s/it][A

tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:24<01:39, 19.90s/it][A

tensor(0.6487, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:45<01:20, 20.21s/it][A

tensor(0.5890, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:06<01:01, 20.39s/it][A

tensor(0.6131, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:27<00:41, 20.55s/it][A

tensor(0.5610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:48<00:20, 20.63s/it][A

tensor(0.6106, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:55<00:00, 20.18s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6081, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6115487246628267

	train acc: 0.647573247535597

	training prec: 0.8494370474148025

	training rec: 0.647573247535597

	training f1: 0.7076956203502469

	Current Learning rate:  3.571428571428572e-05



  2%|▏         | 1/42 [00:02<01:46,  2.59s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.60s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.61s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.62s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.61s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.61s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.60s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.60s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.596315283860479

	Validation acc: 0.6341765873015873

	Validation prec: 0.8579393446064374

	Validation rec: 0.6341765873015873

	Validation f1: 0.6978893905645176
loss: 


  1%|          | 1/83 [00:20<28:38, 20.96s/it][A

tensor(0.5736, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:19, 20.98s/it][A

tensor(0.5889, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:45, 20.82s/it][A

tensor(0.6455, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:25, 20.83s/it][A

tensor(0.5619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:41, 20.53s/it][A

tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:32, 20.68s/it][A

tensor(0.6324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:20, 20.79s/it][A

tensor(0.5795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:02, 20.83s/it][A

tensor(0.6258, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:47, 20.91s/it][A

tensor(0.5527, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:30, 20.96s/it][A

tensor(0.7650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:09, 20.96s/it][A

tensor(0.7252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:47, 20.96s/it][A

tensor(0.6494, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:28, 20.98s/it][A

tensor(0.6273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<24:08, 21.00s/it][A

tensor(0.5866, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:48, 21.01s/it][A

tensor(0.5586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:26, 20.99s/it][A

tensor(0.7484, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<23:04, 20.98s/it][A

tensor(0.5531, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:42, 20.96s/it][A

tensor(0.8879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:21, 20.96s/it][A

tensor(0.5341, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<22:01, 20.98s/it][A

tensor(0.6432, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:17<21:09, 20.47s/it][A

tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:36<20:26, 20.11s/it][A

tensor(0.5978, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:56<19:53, 19.89s/it][A

tensor(0.6263, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:17<19:52, 20.20s/it][A

tensor(0.5853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:38<19:44, 20.42s/it][A

tensor(0.5898, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:59<19:34, 20.60s/it][A

tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:20<19:21, 20.74s/it][A

tensor(0.6806, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:41<19:05, 20.83s/it][A

tensor(0.5282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:02<18:46, 20.87s/it][A

tensor(0.5804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:23<18:29, 20.94s/it][A

tensor(0.6711, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:44<18:08, 20.93s/it][A

tensor(0.5816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:05<17:46, 20.92s/it][A

tensor(0.6042, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:25<17:25, 20.90s/it][A

tensor(0.5490, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:46<17:03, 20.89s/it][A

tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:07<16:43, 20.92s/it][A

tensor(0.5257, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:28<16:23, 20.91s/it][A

tensor(0.6509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:49<16:03, 20.94s/it][A

tensor(0.5231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:10<15:41, 20.92s/it][A

tensor(0.6636, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:31<15:20, 20.91s/it][A

tensor(0.6274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:52<14:56, 20.86s/it][A

tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:13<14:37, 20.89s/it][A

tensor(0.6436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:34<14:17, 20.92s/it][A

tensor(0.5877, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:55<13:56, 20.92s/it][A

tensor(0.6243, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:15<13:29, 20.76s/it][A

tensor(0.6854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:35<13:04, 20.65s/it][A

tensor(0.6055, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:56<12:40, 20.55s/it][A

tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:16<12:16, 20.47s/it][A

tensor(0.5577, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:37<12:00, 20.59s/it][A

tensor(0.6084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:58<11:43, 20.69s/it][A

tensor(0.6531, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:19<11:24, 20.76s/it][A

tensor(0.5760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:40<11:05, 20.80s/it][A

tensor(0.6037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:00<10:45, 20.82s/it][A

tensor(0.6024, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:21<10:25, 20.84s/it][A

tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:42<10:05, 20.86s/it][A

tensor(0.5867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:03<09:43, 20.84s/it][A

tensor(0.6220, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:24<09:23, 20.86s/it][A

tensor(0.6442, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:45<09:01, 20.84s/it][A

tensor(0.5781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:06<08:41, 20.85s/it][A

tensor(0.5206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:26<08:20, 20.84s/it][A

tensor(0.5878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:46<07:49, 20.43s/it][A

tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:07<07:32, 20.56s/it][A

tensor(0.6142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:27<07:09, 20.46s/it][A

tensor(0.6432, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:47<06:47, 20.36s/it][A

tensor(0.5452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:07<06:25, 20.31s/it][A

tensor(0.6074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:28<06:06, 20.34s/it][A

tensor(0.5518, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:54<06:15, 22.06s/it][A

tensor(0.6288, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:16<05:54, 22.15s/it][A

tensor(0.8344, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:37<05:24, 21.62s/it][A

tensor(0.5705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:57<04:57, 21.25s/it][A

tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:17<04:33, 21.03s/it][A

tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:38<04:10, 20.89s/it][A

tensor(0.5693, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:59<03:48, 20.79s/it][A

tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:19<03:27, 20.74s/it][A

tensor(0.6817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:40<03:06, 20.72s/it][A

tensor(0.6307, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:01<02:45, 20.72s/it][A

tensor(0.5787, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:21<02:25, 20.75s/it][A

tensor(0.6495, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:42<02:04, 20.79s/it][A

tensor(0.6792, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:03<01:43, 20.78s/it][A

tensor(0.5730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:24<01:23, 20.77s/it][A

tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:45<01:02, 20.77s/it][A

tensor(0.6595, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:05<00:41, 20.75s/it][A

tensor(0.6246, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:26<00:20, 20.77s/it][A

tensor(0.5517, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:33<00:00, 20.65s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6134, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6187599351607174

	train acc: 0.6360898138006572

	training prec: 0.8501354434100925

	training rec: 0.6360898138006572

	training f1: 0.6977017689032236

	Current Learning rate:  3.428571428571429e-05



  2%|▏         | 1/42 [00:02<01:48,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:43,  2.59s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.60s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.61s/it][A
 12%|█▏        | 5/42 [00:12<01:35,  2.59s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.59s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.60s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.59s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.60s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.61s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.59s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.60s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:12,  2.60s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.60s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.60s/it][A
 43%|████▎     | 18/42 [00:46<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.5974048525094986

	Validation acc: 0.6525297619047619

	Validation prec: 0.8482512371686096

	Validation rec: 0.6525297619047619

	Validation f1: 0.7138108980536312
loss: 


  1%|          | 1/83 [00:20<28:23, 20.78s/it][A

tensor(0.6028, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:05, 20.81s/it][A

tensor(0.6098, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:43, 20.79s/it][A

tensor(0.5936, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:21, 20.78s/it][A

tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<27:00, 20.78s/it][A

tensor(0.5811, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:41, 20.80s/it][A

tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:20, 20.80s/it][A

tensor(0.6612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:02, 20.83s/it][A

tensor(0.5530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:41, 20.83s/it][A

tensor(0.6023, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:20, 20.83s/it][A

tensor(0.5777, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:48<24:56, 20.79s/it][A

tensor(0.6669, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:09<24:37, 20.81s/it][A

tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:30<24:14, 20.78s/it][A

tensor(0.6386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:51<23:53, 20.77s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:11<23:32, 20.77s/it][A

tensor(0.6579, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:32<23:11, 20.78s/it][A

tensor(0.5318, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:53<22:49, 20.76s/it][A

tensor(0.5651, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:14<22:29, 20.76s/it][A

tensor(0.5978, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:34<22:09, 20.77s/it][A

tensor(0.7238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:55<21:49, 20.78s/it][A

tensor(0.5676, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:16<21:29, 20.79s/it][A

tensor(0.6509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:37<21:09, 20.81s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:58<20:47, 20.79s/it][A

tensor(0.6502, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:19<20:27, 20.81s/it][A

tensor(0.6187, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:39<20:08, 20.83s/it][A

tensor(0.6507, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:00<19:47, 20.84s/it][A

tensor(0.5673, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:21<19:26, 20.83s/it][A

tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:42<19:04, 20.81s/it][A

tensor(0.6128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:03<18:46, 20.85s/it][A

tensor(0.5900, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:24<18:24, 20.84s/it][A

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:44<18:02, 20.83s/it][A

tensor(0.6134, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:05<17:40, 20.79s/it][A

tensor(0.5956, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:26<17:19, 20.79s/it][A

tensor(0.6743, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:47<16:59, 20.81s/it][A

tensor(0.7396, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:08<16:39, 20.82s/it][A

tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:28<16:18, 20.83s/it][A

tensor(0.5249, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:49<15:57, 20.81s/it][A

tensor(0.5266, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:10<15:36, 20.80s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:32<15:25, 21.02s/it][A

tensor(0.6424, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:53<15:10, 21.17s/it][A

tensor(0.6713, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:15<14:54, 21.29s/it][A

tensor(0.6411, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:36<14:35, 21.36s/it][A

tensor(0.6030, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:58<14:15, 21.40s/it][A

tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:19<13:55, 21.43s/it][A

tensor(0.6253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:41<13:34, 21.44s/it][A

tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:02<13:13, 21.45s/it][A

tensor(0.5595, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:23<12:48, 21.35s/it][A

tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:44<12:21, 21.18s/it][A

tensor(0.6103, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:05<11:55, 21.04s/it][A

tensor(0.7318, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:25<11:31, 20.95s/it][A

tensor(0.5745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:46<11:08, 20.88s/it][A

tensor(0.6367, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:07<10:45, 20.82s/it][A

tensor(0.5827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:28<10:24, 20.81s/it][A

tensor(0.5226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:48<10:02, 20.77s/it][A

tensor(0.6182, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:09<09:41, 20.76s/it][A

tensor(0.5897, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:30<09:20, 20.76s/it][A

tensor(0.5991, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:50<08:59, 20.74s/it][A

tensor(0.5947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:11<08:37, 20.72s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:32<08:17, 20.73s/it][A

tensor(0.5410, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:53<07:56, 20.72s/it][A

tensor(0.5592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:13<07:35, 20.71s/it][A

tensor(0.6153, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:34<07:15, 20.72s/it][A

tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:55<06:54, 20.73s/it][A

tensor(0.5873, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:16<06:34, 20.74s/it][A

tensor(0.5902, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:36<06:13, 20.74s/it][A

tensor(0.5400, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:57<05:52, 20.73s/it][A

tensor(0.6645, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:18<05:31, 20.75s/it][A

tensor(0.6863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:38<05:11, 20.74s/it][A

tensor(0.5839, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:59<04:50, 20.76s/it][A

tensor(0.6142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:21<04:31, 20.91s/it][A

tensor(0.7365, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:40<04:06, 20.58s/it][A

tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:01<03:47, 20.72s/it][A

tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:22<03:27, 20.78s/it][A

tensor(0.5879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:43<03:07, 20.85s/it][A

tensor(0.5932, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:04<02:47, 20.91s/it][A

tensor(0.6746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:25<02:26, 20.92s/it][A

tensor(0.5649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:46<02:05, 20.94s/it][A

tensor(0.6099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:07<01:44, 20.97s/it][A

tensor(0.6380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:28<01:23, 20.99s/it][A

tensor(0.5971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:49<01:02, 20.98s/it][A

tensor(0.6742, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:10<00:41, 20.99s/it][A

tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:31<00:20, 21.00s/it][A

tensor(0.5738, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:39<00:00, 20.71s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5142, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6137948625059013

	train acc: 0.66088786966046

	training prec: 0.8467538608933998

	training rec: 0.66088786966046

	training f1: 0.7171479352377395

	Current Learning rate:  3.285714285714286e-05



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.67s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:28<01:19,  2.56s/it][A
 29%|██▊       | 12/42 [00:31<01:15,  2.50s/it][A
 31%|███       | 13/42 [00:33<01:13,  2.54s/it][A
 33%|███▎      | 14/42 [00:36<01:12,  2.58s/it][A
 36%|███▌      | 15/42 [00:38<01:07,  2.52s/it][A
 38%|███▊      | 16/42 [00:41<01:04,  2.48s/it][A
 40%|████      | 17/42 [00:43<01:01,  2.46s/it][A
 43%|████▎     | 18/42 [00:46<00:58,  2.43s/it][A
 45%|████▌     | 19/42 [00:48<00:56,  2.44s/it][A
 48%|████▊     | 20/42 [00:51<00:55,  2


	Validation loss: 0.5998541294109254

	Validation acc: 0.5833333333333334

	Validation prec: 0.8690452509842513

	Validation rec: 0.5833333333333334

	Validation f1: 0.6547314368988405
loss: 


  1%|          | 1/83 [00:20<27:26, 20.08s/it][A

tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:05, 20.07s/it][A

tensor(0.5915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:00<26:45, 20.07s/it][A

tensor(0.5499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:20<26:27, 20.09s/it][A

tensor(0.5426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:40<26:05, 20.07s/it][A

tensor(0.5758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:00<25:47, 20.10s/it][A

tensor(0.6675, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:20<25:29, 20.12s/it][A

tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:40<25:07, 20.09s/it][A

tensor(0.6356, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:00<24:47, 20.10s/it][A

tensor(0.6358, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:21<24:28, 20.12s/it][A

tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:41<24:07, 20.11s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:01<23:48, 20.12s/it][A

tensor(0.6302, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:22<23:47, 20.39s/it][A

tensor(0.5792, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:43<23:39, 20.57s/it][A

tensor(0.6914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:03<23:11, 20.46s/it][A

tensor(0.6254, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:23<22:45, 20.38s/it][A

tensor(0.5987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:43<22:23, 20.36s/it][A

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:04<22:02, 20.35s/it][A

tensor(0.6927, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:24<21:43, 20.37s/it][A

tensor(0.5531, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:45<21:24, 20.40s/it][A

tensor(0.5953, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:05<21:02, 20.37s/it][A

tensor(0.6807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:25<20:40, 20.34s/it][A

tensor(0.6758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:46<20:21, 20.36s/it][A

tensor(0.5542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:06<20:00, 20.35s/it][A

tensor(0.5870, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:26<19:40, 20.36s/it][A

tensor(0.6665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:47<19:20, 20.37s/it][A

tensor(0.5647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:07<18:59, 20.36s/it][A

tensor(0.6310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:27<18:38, 20.34s/it][A

tensor(0.5623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:48<18:18, 20.34s/it][A

tensor(0.6656, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:08<17:58, 20.36s/it][A

tensor(0.5554, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:28<17:38, 20.36s/it][A

tensor(0.5508, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:49<17:18, 20.36s/it][A

tensor(0.6017, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:09<16:56, 20.32s/it][A

tensor(0.5991, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:29<16:35, 20.32s/it][A

tensor(0.6016, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:50<16:14, 20.30s/it][A

tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:10<15:54, 20.30s/it][A

tensor(0.6341, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:30<15:32, 20.27s/it][A

tensor(0.6892, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:50<15:12, 20.27s/it][A

tensor(0.5533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:11<14:51, 20.26s/it][A

tensor(0.6607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:31<14:31, 20.26s/it][A

tensor(0.5876, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:51<14:09, 20.23s/it][A

tensor(0.5741, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:11<13:49, 20.24s/it][A

tensor(0.6548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:31<13:28, 20.22s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:52<13:08, 20.22s/it][A

tensor(0.6030, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:12<12:47, 20.21s/it][A

tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:32<12:27, 20.20s/it][A

tensor(0.6900, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:52<12:07, 20.20s/it][A

tensor(0.5843, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:13<11:47, 20.22s/it][A

tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:33<11:27, 20.22s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:53<11:07, 20.23s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:13<10:47, 20.24s/it][A

tensor(0.5939, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:33<10:26, 20.22s/it][A

tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:53<10:03, 20.12s/it][A

tensor(0.5840, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:13<09:41, 20.06s/it][A

tensor(0.6061, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:33<09:20, 20.01s/it][A

tensor(0.5171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [18:53<08:59, 19.99s/it][A

tensor(0.6083, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:13<08:39, 19.97s/it][A

tensor(0.6784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:33<08:18, 19.96s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [19:53<07:59, 19.98s/it][A

tensor(0.7321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:13<07:40, 20.01s/it][A

tensor(0.6443, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:33<07:20, 20.02s/it][A

tensor(0.5709, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [20:53<07:00, 20.02s/it][A

tensor(0.6256, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:13<06:40, 20.04s/it][A

tensor(0.5836, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:33<06:20, 20.03s/it][A

tensor(0.6092, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [21:53<06:00, 20.04s/it][A

tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:13<05:40, 20.05s/it][A

tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:33<05:20, 20.06s/it][A

tensor(0.7279, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [22:53<05:00, 20.05s/it][A

tensor(0.7049, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:14<04:40, 20.07s/it][A

tensor(0.6922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:34<04:20, 20.06s/it][A

tensor(0.6051, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [23:54<04:02, 20.21s/it][A

tensor(0.5756, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:15<03:43, 20.29s/it][A

tensor(0.6099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:35<03:23, 20.33s/it][A

tensor(0.5870, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [24:55<03:02, 20.33s/it][A

tensor(0.7234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:16<02:42, 20.35s/it][A

tensor(0.5455, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:36<02:22, 20.35s/it][A

tensor(0.5668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [25:56<02:02, 20.35s/it][A

tensor(0.5500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:17<01:41, 20.34s/it][A

tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:37<01:21, 20.35s/it][A

tensor(0.6223, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [26:57<01:01, 20.34s/it][A

tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:18<00:40, 20.36s/it][A

tensor(0.5767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:38<00:20, 20.36s/it][A

tensor(0.5683, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:45<00:00, 20.07s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7053, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6160358592688319

	train acc: 0.6358844468784228

	training prec: 0.8497458837400012

	training rec: 0.6358844468784228

	training f1: 0.6976088311527507

	Current Learning rate:  3.142857142857143e-05



  2%|▏         | 1/42 [00:02<01:42,  2.50s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.52s/it][A
  7%|▋         | 3/42 [00:07<01:38,  2.53s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.50s/it][A
 12%|█▏        | 5/42 [00:12<01:32,  2.51s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.53s/it][A
 17%|█▋        | 7/42 [00:17<01:27,  2.51s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.52s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.54s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.52s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.52s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.53s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.52s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.53s/it][A
 36%|███▌      | 15/42 [00:37<01:08,  2.54s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.52s/it][A
 40%|████      | 17/42 [00:42<01:03,  2.53s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.54s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.52s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation loss: 0.5921015129202888

	Validation acc: 0.6178075396825398

	Validation prec: 0.8637388835963165

	Validation rec: 0.6178075396825398

	Validation f1: 0.6859424835553414
loss: 


  1%|          | 1/83 [00:20<27:32, 20.15s/it][A

tensor(0.6331, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:14, 20.18s/it][A

tensor(0.6045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:00<26:51, 20.15s/it][A

tensor(0.6461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:20<26:31, 20.15s/it][A

tensor(0.5587, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:40<26:11, 20.15s/it][A

tensor(0.5447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:00<25:49, 20.12s/it][A

tensor(0.6006, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:20<25:29, 20.13s/it][A

tensor(0.5979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:41<25:10, 20.14s/it][A

tensor(0.6271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:01<24:50, 20.15s/it][A

tensor(0.5500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:21<24:29, 20.14s/it][A

tensor(0.5878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:41<24:10, 20.15s/it][A

tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:01<23:49, 20.13s/it][A

tensor(0.5020, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:21<23:28, 20.12s/it][A

tensor(0.6129, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:41<23:09, 20.13s/it][A

tensor(0.5582, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:02<22:49, 20.14s/it][A

tensor(0.5612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:22<22:27, 20.11s/it][A

tensor(0.5745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:42<22:06, 20.10s/it][A

tensor(0.6015, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:02<21:47, 20.11s/it][A

tensor(0.6261, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:22<21:27, 20.11s/it][A

tensor(0.5752, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:42<21:06, 20.11s/it][A

tensor(0.5363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:02<20:46, 20.10s/it][A

tensor(0.5441, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:22<20:25, 20.10s/it][A

tensor(0.6137, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:42<20:06, 20.11s/it][A

tensor(0.5494, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:02<19:45, 20.09s/it][A

tensor(0.7118, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:22<19:25, 20.09s/it][A

tensor(0.5551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:43<19:06, 20.12s/it][A

tensor(0.6063, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:03<18:46, 20.12s/it][A

tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:23<18:26, 20.12s/it][A

tensor(0.5013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:43<18:06, 20.13s/it][A

tensor(0.6271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:03<17:47, 20.14s/it][A

tensor(0.5308, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:23<17:27, 20.14s/it][A

tensor(0.6809, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:44<17:07, 20.14s/it][A

tensor(0.5607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:04<16:47, 20.14s/it][A

tensor(0.7454, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:24<16:26, 20.14s/it][A

tensor(0.5281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:44<16:07, 20.15s/it][A

tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:04<15:48, 20.18s/it][A

tensor(0.6731, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:24<15:28, 20.18s/it][A

tensor(0.6953, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:45<15:09, 20.22s/it][A

tensor(0.7497, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:05<14:50, 20.23s/it][A

tensor(0.7114, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:25<14:28, 20.19s/it][A

tensor(0.5891, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:45<14:07, 20.17s/it][A

tensor(0.6124, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:05<13:47, 20.17s/it][A

tensor(0.5491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:25<13:26, 20.15s/it][A

tensor(0.5637, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:47<13:27, 20.69s/it][A

tensor(0.6529, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:09<13:18, 21.00s/it][A

tensor(0.5903, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:30<12:55, 20.96s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:51<12:31, 20.88s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:11<12:08, 20.82s/it][A

tensor(0.5800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:32<11:45, 20.76s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:53<11:29, 20.90s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:15<11:12, 21.01s/it][A

tensor(0.6037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:36<10:53, 21.08s/it][A

tensor(0.5409, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:57<10:33, 21.11s/it][A

tensor(0.5922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:18<10:14, 21.18s/it][A

tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:39<09:52, 21.16s/it][A

tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:01<09:31, 21.18s/it][A

tensor(0.7475, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:22<09:10, 21.16s/it][A

tensor(0.5447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:43<08:49, 21.18s/it][A

tensor(0.5611, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:04<08:28, 21.20s/it][A

tensor(0.5352, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:25<08:08, 21.22s/it][A

tensor(0.5700, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:47<07:47, 21.23s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:08<07:25, 21.21s/it][A

tensor(0.6321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:29<07:03, 21.20s/it][A

tensor(0.7820, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:50<06:43, 21.21s/it][A

tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:11<06:21, 21.19s/it][A

tensor(0.6128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:33<06:00, 21.18s/it][A

tensor(0.5539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:54<05:38, 21.18s/it][A

tensor(0.7037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:15<05:17, 21.19s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:36<04:56, 21.20s/it][A

tensor(0.5607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:57<04:35, 21.20s/it][A

tensor(0.6633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:19<04:14, 21.19s/it][A

tensor(0.5729, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:40<03:53, 21.20s/it][A

tensor(0.6095, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:01<03:31, 21.18s/it][A

tensor(0.5249, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:22<03:10, 21.18s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:43<02:49, 21.17s/it][A

tensor(0.6454, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:04<02:28, 21.18s/it][A

tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:26<02:07, 21.21s/it][A

tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:47<01:46, 21.21s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:08<01:24, 21.04s/it][A

tensor(0.5781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:29<01:03, 21.07s/it][A

tensor(0.6952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:50<00:42, 21.11s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:11<00:21, 21.13s/it][A

tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:18<00:00, 20.47s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6889, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6102547545030893

	train acc: 0.649644030668127

	training prec: 0.854765385607009

	training rec: 0.649644030668127

	training f1: 0.7100802696252502

	Current Learning rate:  3e-05



  2%|▏         | 1/42 [00:02<01:51,  2.71s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:08<01:44,  2.68s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.68s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:16<01:36,  2.67s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.68s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.67s/it][A
 21%|██▏       | 9/42 [00:24<01:28,  2.67s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.68s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.67s/it][A
 29%|██▊       | 12/42 [00:32<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.68s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:40<01:12,  2.67s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.68s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.67s/it][A
 43%|████▎     | 18/42 [00:48<01:04,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.68s/it][A
 48%|████▊     | 20/42 [00:53<00:59,  2


	Validation loss: 0.5834458080076036

	Validation acc: 0.6259920634920635

	Validation prec: 0.8664105722941345

	Validation rec: 0.6259920634920635

	Validation f1: 0.6921157504445914
loss: 


  1%|          | 1/83 [00:21<29:00, 21.23s/it][A

tensor(0.6107, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:38, 21.22s/it][A

tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:19, 21.25s/it][A

tensor(0.5504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:46, 21.10s/it][A

tensor(0.8161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:26, 21.11s/it][A

tensor(0.6020, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:08, 21.15s/it][A

tensor(0.6787, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:49, 21.18s/it][A

tensor(0.5559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:49<26:30, 21.21s/it][A

tensor(0.5682, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:10, 21.22s/it][A

tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:31<25:48, 21.22s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:53<25:29, 21.25s/it][A

tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:14<25:09, 21.26s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:35<24:47, 21.26s/it][A

tensor(0.7628, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:56<24:18, 21.14s/it][A

tensor(0.5264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:17<23:59, 21.17s/it][A

tensor(0.6302, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:39<23:41, 21.21s/it][A

tensor(0.5275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:00<23:21, 21.23s/it][A

tensor(0.5090, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:21<22:59, 21.23s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:42<22:36, 21.20s/it][A

tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:03<22:07, 21.06s/it][A

tensor(0.6389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:24<21:38, 20.95s/it][A

tensor(0.6625, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:44<21:12, 20.86s/it][A

tensor(0.6718, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:05<20:47, 20.79s/it][A

tensor(0.6684, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:26<20:24, 20.75s/it][A

tensor(0.5582, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:46<20:03, 20.75s/it][A

tensor(0.6974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:07<19:40, 20.71s/it][A

tensor(0.5623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:28<19:18, 20.69s/it][A

tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:48<18:56, 20.67s/it][A

tensor(0.5329, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:09<18:34, 20.64s/it][A

tensor(0.6198, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:29<18:13, 20.63s/it][A

tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:50<17:53, 20.64s/it][A

tensor(0.6296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:11<17:32, 20.63s/it][A

tensor(0.5748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:11, 20.62s/it][A

tensor(0.6567, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<16:51, 20.64s/it][A

tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:13<16:30, 20.64s/it][A

tensor(0.5813, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:33<16:10, 20.65s/it][A

tensor(0.4636, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:54<15:48, 20.62s/it][A

tensor(0.7211, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:14<15:25, 20.56s/it][A

tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:35<15:03, 20.53s/it][A

tensor(0.5748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:55<14:41, 20.51s/it][A

tensor(0.7159, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:16<14:21, 20.50s/it][A

tensor(0.6025, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:36<13:59, 20.48s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:57<13:40, 20.51s/it][A

tensor(0.6172, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:17<13:18, 20.46s/it][A

tensor(0.7738, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:38<12:57, 20.46s/it][A

tensor(0.5602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:58<12:36, 20.45s/it][A

tensor(0.6292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:18<12:16, 20.46s/it][A

tensor(0.6200, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:39<11:55, 20.44s/it][A

tensor(0.5531, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:59<11:35, 20.45s/it][A

tensor(0.6000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:20<11:15, 20.46s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:40<10:54, 20.46s/it][A

tensor(0.7342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:01<10:36, 20.54s/it][A

tensor(0.5518, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:22<10:18, 20.60s/it][A

tensor(0.5292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:42<09:58, 20.63s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:03<09:37, 20.64s/it][A

tensor(0.6316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:24<09:17, 20.65s/it][A

tensor(0.7354, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:44<08:56, 20.64s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:05<08:36, 20.64s/it][A

tensor(0.6142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:26<08:15, 20.63s/it][A

tensor(0.7088, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:46<07:53, 20.61s/it][A

tensor(0.5655, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:07<07:33, 20.60s/it][A

tensor(0.5856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:27<07:12, 20.58s/it][A

tensor(0.5879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:48<06:51, 20.56s/it][A

tensor(0.6397, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:08<06:30, 20.56s/it][A

tensor(0.6851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:29<06:09, 20.55s/it][A

tensor(0.7585, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:50<05:49, 20.57s/it][A

tensor(0.4925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:10<05:28, 20.55s/it][A

tensor(0.5376, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:31<05:08, 20.55s/it][A

tensor(0.5863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:51<04:48, 20.61s/it][A

tensor(0.5006, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:12<04:28, 20.64s/it][A

tensor(0.5905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:33<04:08, 20.68s/it][A

tensor(0.6228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:54<03:48, 20.73s/it][A

tensor(0.5809, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:14<03:27, 20.71s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:35<03:06, 20.70s/it][A

tensor(0.5902, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:56<02:45, 20.71s/it][A

tensor(0.5380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:16<02:24, 20.69s/it][A

tensor(0.5761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:37<02:04, 20.68s/it][A

tensor(0.7104, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:58<01:43, 20.72s/it][A

tensor(0.5460, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:19<01:22, 20.73s/it][A

tensor(0.6135, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:39<01:02, 20.73s/it][A

tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:00<00:41, 20.72s/it][A

tensor(0.5954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:21<00:20, 20.71s/it][A

tensor(0.5648, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:28<00:00, 20.58s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5296, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6169573431273541

	train acc: 0.6443558324205915

	training prec: 0.8473664470316085

	training rec: 0.6443558324205915

	training f1: 0.7056135110664518

	Current Learning rate:  2.857142857142857e-05



  2%|▏         | 1/42 [00:02<01:47,  2.62s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.62s/it][A
  7%|▋         | 3/42 [00:07<01:40,  2.58s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.59s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.60s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.61s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.61s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.59s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.60s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.61s/it][A
 29%|██▊       | 12/42 [00:31<01:17,  2.59s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.60s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:38<01:09,  2.59s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.60s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:46<01:02,  2.60s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.5984206653776623

	Validation acc: 0.5756448412698413

	Validation prec: 0.8662323973830791

	Validation rec: 0.5756448412698413

	Validation f1: 0.6482388931176063
loss: 


  1%|          | 1/83 [00:21<29:00, 21.23s/it][A

tensor(0.6375, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:34, 21.17s/it][A

tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:15, 21.20s/it][A

tensor(0.7044, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:54, 21.20s/it][A

tensor(0.5722, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:46<27:34, 21.21s/it][A

tensor(0.5284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:07<27:12, 21.20s/it][A

tensor(0.6707, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:51, 21.21s/it][A

tensor(0.6497, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:49<26:31, 21.22s/it][A

tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:11, 21.23s/it][A

tensor(0.5960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:32<25:50, 21.24s/it][A

tensor(0.6254, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:53<25:29, 21.24s/it][A

tensor(0.5480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:14<25:07, 21.24s/it][A

tensor(0.6393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:35<24:41, 21.17s/it][A

tensor(0.5242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:58<24:52, 21.63s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:19<24:21, 21.49s/it][A

tensor(0.6118, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:40<23:50, 21.35s/it][A

tensor(0.6529, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:01<23:22, 21.25s/it][A

tensor(0.5735, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:22<22:54, 21.15s/it][A

tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:43<22:31, 21.12s/it][A

tensor(0.5873, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:04<22:09, 21.10s/it][A

tensor(0.5394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:25<21:45, 21.05s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:46<21:18, 20.96s/it][A

tensor(0.5007, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:07<20:57, 20.96s/it][A

tensor(0.6073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:28<20:37, 20.98s/it][A

tensor(0.7269, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:49<20:16, 20.98s/it][A

tensor(0.6085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:10<19:56, 20.99s/it][A

tensor(0.5869, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:31<19:34, 20.98s/it][A

tensor(0.6080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:50<18:49, 20.54s/it][A

tensor(0.6074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:11<18:28, 20.53s/it][A

tensor(0.6269, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:32<18:12, 20.62s/it][A

tensor(0.5741, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:52<17:56, 20.70s/it][A

tensor(0.6176, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:13<17:38, 20.76s/it][A

tensor(0.7126, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:34<17:20, 20.81s/it][A

tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:54<16:37, 20.37s/it][A

tensor(0.5923, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:24, 20.51s/it][A

tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:10, 20.64s/it][A

tensor(0.6096, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<15:52, 20.71s/it][A

tensor(0.5441, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:37, 20.83s/it][A

tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:17, 20.85s/it][A

tensor(0.5862, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<14:57, 20.87s/it][A

tensor(0.6846, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:35, 20.85s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:41<14:16, 20.89s/it][A

tensor(0.6692, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:02<13:55, 20.89s/it][A

tensor(0.6038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:23<13:35, 20.90s/it][A

tensor(0.5256, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:44<13:13, 20.89s/it][A

tensor(0.7223, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:05<12:54, 20.93s/it][A

tensor(0.4735, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:33, 20.92s/it][A

tensor(0.5888, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:47<12:12, 20.94s/it][A

tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:07<11:51, 20.93s/it][A

tensor(0.6614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:28<11:30, 20.94s/it][A

tensor(0.5316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:49<11:09, 20.94s/it][A

tensor(0.5454, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:10<10:48, 20.91s/it][A

tensor(0.5747, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:31<10:28, 20.95s/it][A

tensor(0.5312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:52<10:08, 20.97s/it][A

tensor(0.5721, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:13<09:46, 20.94s/it][A

tensor(0.5906, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:34<09:25, 20.95s/it][A

tensor(0.5707, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:55<09:04, 20.94s/it][A

tensor(0.5349, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:16<08:43, 20.96s/it][A

tensor(0.6897, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:37<08:22, 20.96s/it][A

tensor(0.5784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:58<08:01, 20.92s/it][A

tensor(0.6690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:19<07:40, 20.93s/it][A

tensor(0.6194, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:40<07:19, 20.95s/it][A

tensor(0.5687, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:59<06:49, 20.47s/it][A

tensor(0.6512, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:20<06:31, 20.62s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:41<06:13, 20.72s/it][A

tensor(0.7522, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:02<05:53, 20.80s/it][A

tensor(0.5863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:22<05:29, 20.62s/it][A

tensor(0.6992, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:42<05:07, 20.47s/it][A

tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:03<04:45, 20.41s/it][A

tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:23<04:24, 20.37s/it][A

tensor(0.5705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:43<04:03, 20.27s/it][A

tensor(0.5176, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:03<03:43, 20.29s/it][A

tensor(0.5774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:24<03:23, 20.31s/it][A

tensor(0.6209, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:44<03:03, 20.34s/it][A

tensor(0.5967, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:04<02:42, 20.34s/it][A

tensor(0.5971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:25<02:22, 20.35s/it][A

tensor(0.5600, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:45<02:02, 20.35s/it][A

tensor(0.5797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:05<01:41, 20.35s/it][A

tensor(0.5372, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:26<01:21, 20.36s/it][A

tensor(0.7088, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:46<01:01, 20.34s/it][A

tensor(0.6181, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:06<00:40, 20.33s/it][A

tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:27<00:20, 20.33s/it][A

tensor(0.5980, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:34<00:00, 20.65s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5356, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6060431398541094

	train acc: 0.6505339539978094

	training prec: 0.8532708240283757

	training rec: 0.6505339539978094

	training f1: 0.7105146474259197

	Current Learning rate:  2.714285714285714e-05



  2%|▏         | 1/42 [00:02<01:41,  2.47s/it][A
  5%|▍         | 2/42 [00:05<01:40,  2.51s/it][A
  7%|▋         | 3/42 [00:07<01:38,  2.52s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.51s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.53s/it][A
 17%|█▋        | 7/42 [00:17<01:27,  2.51s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.52s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.52s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.50s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.52s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.50s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.50s/it][A
 36%|███▌      | 15/42 [00:37<01:07,  2.51s/it][A
 38%|███▊      | 16/42 [00:40<01:04,  2.50s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.51s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.52s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.50s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation loss: 0.5874522789603188

	Validation acc: 0.5979662698412698

	Validation prec: 0.8657991645517343

	Validation rec: 0.5979662698412698

	Validation f1: 0.6666212926855791
loss: 


  1%|          | 1/83 [00:20<28:00, 20.50s/it][A

tensor(0.5390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:29, 20.37s/it][A

tensor(0.5766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:06, 20.33s/it][A

tensor(0.6189, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:21<26:43, 20.30s/it][A

tensor(0.6754, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:41<26:22, 20.28s/it][A

tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:01<26:03, 20.30s/it][A

tensor(0.6839, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:22<25:43, 20.31s/it][A

tensor(0.7371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:42<25:26, 20.35s/it][A

tensor(0.6463, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:02<25:05, 20.34s/it][A

tensor(0.6510, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:23<24:43, 20.33s/it][A

tensor(0.5768, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:43<24:24, 20.35s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:04<24:05, 20.35s/it][A

tensor(0.6082, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:24<23:44, 20.35s/it][A

tensor(0.5356, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:44<23:24, 20.35s/it][A

tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:05<23:05, 20.38s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:25<22:44, 20.37s/it][A

tensor(0.5092, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:45<22:23, 20.35s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:06<22:03, 20.36s/it][A

tensor(0.4712, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:26<21:42, 20.36s/it][A

tensor(0.5595, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:46<21:22, 20.35s/it][A

tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:07<21:00, 20.34s/it][A

tensor(0.5555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:27<20:39, 20.32s/it][A

tensor(0.5534, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:47<20:20, 20.34s/it][A

tensor(0.5575, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:08<19:59, 20.34s/it][A

tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:28<19:40, 20.35s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:48<19:19, 20.34s/it][A

tensor(0.6205, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:09<19:00, 20.36s/it][A

tensor(0.5823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:29<18:39, 20.36s/it][A

tensor(0.6260, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:50<18:19, 20.37s/it][A

tensor(0.6169, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:10<18:00, 20.39s/it][A

tensor(0.6736, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:30<17:39, 20.37s/it][A

tensor(0.5865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:51<17:19, 20.38s/it][A

tensor(0.7218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:11<16:59, 20.40s/it][A

tensor(0.5994, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:32<16:38, 20.39s/it][A

tensor(0.6859, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:52<16:17, 20.36s/it][A

tensor(0.6126, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:12<15:56, 20.34s/it][A

tensor(0.4610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:32<15:35, 20.34s/it][A

tensor(0.5720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:53<15:14, 20.33s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:13<14:53, 20.32s/it][A

tensor(0.6057, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:33<14:33, 20.32s/it][A

tensor(0.5670, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:54<14:14, 20.35s/it][A

tensor(0.5658, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:14<13:53, 20.34s/it][A

tensor(0.6463, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:34<13:33, 20.33s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:55<13:13, 20.34s/it][A

tensor(0.5903, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:15<12:52, 20.32s/it][A

tensor(0.6367, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:35<12:32, 20.33s/it][A

tensor(0.5974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:56<12:12, 20.34s/it][A

tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:16<11:52, 20.36s/it][A

tensor(0.7191, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:37<11:32, 20.36s/it][A

tensor(0.6828, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:57<11:12, 20.37s/it][A

tensor(0.5148, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:17<10:51, 20.35s/it][A

tensor(0.5833, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:38<10:31, 20.37s/it][A

tensor(0.6701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:58<10:10, 20.34s/it][A

tensor(0.6007, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:18<09:49, 20.33s/it][A

tensor(0.5866, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:39<09:29, 20.34s/it][A

tensor(0.6545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [18:59<09:09, 20.34s/it][A

tensor(0.5347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:19<08:49, 20.35s/it][A

tensor(0.5484, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:40<08:29, 20.37s/it][A

tensor(0.6105, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:00<08:08, 20.37s/it][A

tensor(0.5489, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:21<07:49, 20.39s/it][A

tensor(0.5446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:41<07:29, 20.41s/it][A

tensor(0.5330, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:01<07:08, 20.41s/it][A

tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:22<06:48, 20.42s/it][A

tensor(0.5826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:42<06:28, 20.44s/it][A

tensor(0.5536, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:03<06:07, 20.40s/it][A

tensor(0.6470, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:23<05:46, 20.40s/it][A

tensor(0.5552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:43<05:25, 20.37s/it][A

tensor(0.6513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:04<05:05, 20.37s/it][A

tensor(0.5805, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:24<04:45, 20.36s/it][A

tensor(0.6784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:44<04:24, 20.35s/it][A

tensor(0.7036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:05<04:04, 20.35s/it][A

tensor(0.7099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:25<03:43, 20.35s/it][A

tensor(0.6915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:45<03:23, 20.36s/it][A

tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:06<03:03, 20.36s/it][A

tensor(0.7081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:26<02:42, 20.36s/it][A

tensor(0.5568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:46<02:22, 20.33s/it][A

tensor(0.7265, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:07<02:02, 20.36s/it][A

tensor(0.5547, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:27<01:41, 20.34s/it][A

tensor(0.7601, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:48<01:21, 20.34s/it][A

tensor(0.5891, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:08<01:01, 20.35s/it][A

tensor(0.7363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:28<00:40, 20.36s/it][A

tensor(0.6032, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:49<00:20, 20.35s/it][A

tensor(0.5891, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:56<00:00, 20.19s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5706, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6096111471394459

	train acc: 0.6484802847754655

	training prec: 0.8511644894767825

	training rec: 0.6484802847754655

	training f1: 0.7089528089653089

	Current Learning rate:  2.5714285714285714e-05



  2%|▏         | 1/42 [00:02<01:44,  2.55s/it][A
  5%|▍         | 2/42 [00:05<01:39,  2.50s/it][A
  7%|▋         | 3/42 [00:07<01:38,  2.52s/it][A
 10%|▉         | 4/42 [00:10<01:36,  2.53s/it][A
 12%|█▏        | 5/42 [00:12<01:32,  2.51s/it][A
 14%|█▍        | 6/42 [00:15<01:30,  2.52s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.52s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.51s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.52s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.52s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.51s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.52s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.53s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.51s/it][A
 36%|███▌      | 15/42 [00:37<01:08,  2.52s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.53s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.51s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.53s/it][A
 45%|████▌     | 19/42 [00:47<00:58,  2.54s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation loss: 0.5892695855526697

	Validation acc: 0.6081349206349207

	Validation prec: 0.8675976933650357

	Validation rec: 0.6081349206349207

	Validation f1: 0.6782432918126593
loss: 


  1%|          | 1/83 [00:20<27:48, 20.34s/it][A

tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:28, 20.35s/it][A

tensor(0.5586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:08, 20.35s/it][A

tensor(0.5613, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:21<26:47, 20.35s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:41<26:27, 20.36s/it][A

tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:02<26:06, 20.35s/it][A

tensor(0.7188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:22<25:44, 20.32s/it][A

tensor(0.5793, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:42<25:25, 20.35s/it][A

tensor(0.6394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:03<25:08, 20.38s/it][A

tensor(0.5675, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:23<24:47, 20.38s/it][A

tensor(0.5777, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:43<24:26, 20.37s/it][A

tensor(0.5363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:04<24:07, 20.38s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:24<23:47, 20.40s/it][A

tensor(0.4963, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:45<23:26, 20.38s/it][A

tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:05<23:04, 20.36s/it][A

tensor(0.5304, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:25<22:44, 20.37s/it][A

tensor(0.6205, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:46<22:23, 20.35s/it][A

tensor(0.5775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:06<22:03, 20.36s/it][A

tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:26<21:44, 20.38s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:47<21:24, 20.39s/it][A

tensor(0.6920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:07<21:04, 20.39s/it][A

tensor(0.5617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:28<20:42, 20.37s/it][A

tensor(0.5879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:48<20:21, 20.36s/it][A

tensor(0.6048, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:08<19:59, 20.34s/it][A

tensor(0.7267, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:28<19:38, 20.32s/it][A

tensor(0.5819, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:49<19:18, 20.32s/it][A

tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:09<18:58, 20.33s/it][A

tensor(0.7297, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:29<18:38, 20.33s/it][A

tensor(0.5102, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:50<18:18, 20.34s/it][A

tensor(0.7026, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:10<17:59, 20.37s/it][A

tensor(0.5255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:31<17:39, 20.37s/it][A

tensor(0.5733, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:51<17:18, 20.37s/it][A

tensor(0.6454, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:11<16:58, 20.36s/it][A

tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:32<16:37, 20.36s/it][A

tensor(0.6339, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:52<16:16, 20.35s/it][A

tensor(0.7016, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:12<15:56, 20.36s/it][A

tensor(0.5668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:33<15:36, 20.35s/it][A

tensor(0.6584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:53<15:13, 20.31s/it][A

tensor(0.5202, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:13<14:53, 20.30s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:34<14:33, 20.31s/it][A

tensor(0.5271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:54<14:14, 20.35s/it][A

tensor(0.5344, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:14<13:55, 20.37s/it][A

tensor(0.6000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:35<13:35, 20.38s/it][A

tensor(0.5872, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:55<13:14, 20.37s/it][A

tensor(0.5989, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:16<12:54, 20.38s/it][A

tensor(0.5704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:36<12:34, 20.39s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:57<12:15, 20.42s/it][A

tensor(0.6730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:17<11:54, 20.43s/it][A

tensor(0.6009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:37<11:34, 20.43s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:58<11:14, 20.44s/it][A

tensor(0.5823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:18<10:53, 20.44s/it][A

tensor(0.6187, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:39<10:33, 20.45s/it][A

tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:59<10:13, 20.44s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:20<09:52, 20.45s/it][A

tensor(0.5556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:40<09:32, 20.46s/it][A

tensor(0.5689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:01<09:12, 20.46s/it][A

tensor(0.5774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:21<08:51, 20.46s/it][A

tensor(0.5885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:41<08:31, 20.45s/it][A

tensor(0.6916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:02<08:10, 20.44s/it][A

tensor(0.5692, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:22<07:49, 20.41s/it][A

tensor(0.6509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:43<07:29, 20.42s/it][A

tensor(0.5735, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:03<07:08, 20.43s/it][A

tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:24<06:48, 20.43s/it][A

tensor(0.6994, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:44<06:28, 20.44s/it][A

tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:04<06:08, 20.45s/it][A

tensor(0.5595, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:25<05:47, 20.42s/it][A

tensor(0.5353, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:45<05:26, 20.42s/it][A

tensor(0.7273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:06<05:06, 20.42s/it][A

tensor(0.5514, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:26<04:46, 20.43s/it][A

tensor(0.7251, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:47<04:25, 20.44s/it][A

tensor(0.5250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:07<04:05, 20.45s/it][A

tensor(0.6097, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:28<03:44, 20.45s/it][A

tensor(0.6763, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:48<03:24, 20.44s/it][A

tensor(0.6424, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:08<03:03, 20.44s/it][A

tensor(0.4571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:29<02:43, 20.45s/it][A

tensor(0.5791, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:49<02:23, 20.47s/it][A

tensor(0.6237, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:10<02:02, 20.46s/it][A

tensor(0.6101, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:30<01:42, 20.46s/it][A

tensor(0.5624, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:51<01:21, 20.46s/it][A

tensor(0.6423, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:11<01:01, 20.46s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:32<00:40, 20.45s/it][A

tensor(0.6088, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:52<00:20, 20.46s/it][A

tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:59<00:00, 20.24s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.8788, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6090405515159469

	train acc: 0.6558563800657174

	training prec: 0.8489693034131961

	training rec: 0.6558563800657174

	training f1: 0.7140678142221717

	Current Learning rate:  2.4285714285714288e-05



  2%|▏         | 1/42 [00:02<01:45,  2.57s/it][A
  5%|▍         | 2/42 [00:05<01:42,  2.57s/it][A
  7%|▋         | 3/42 [00:07<01:38,  2.53s/it][A
 10%|▉         | 4/42 [00:10<01:36,  2.54s/it][A
 12%|█▏        | 5/42 [00:12<01:34,  2.54s/it][A
 14%|█▍        | 6/42 [00:15<01:30,  2.53s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.54s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.54s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.52s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.54s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.54s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.52s/it][A
 31%|███       | 13/42 [00:32<01:13,  2.53s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.54s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.54s/it][A
 38%|███▊      | 16/42 [00:40<01:06,  2.54s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.55s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.53s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.54s/it][A
 48%|████▊     | 20/42 [00:50<00:56,  2


	Validation loss: 0.5955045102607637

	Validation acc: 0.5930059523809523

	Validation prec: 0.8639577374830028

	Validation rec: 0.5930059523809523

	Validation f1: 0.6610552735218599
loss: 


  1%|          | 1/83 [00:20<27:57, 20.46s/it][A

tensor(0.5854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:35, 20.44s/it][A

tensor(0.6800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:32, 20.66s/it][A

tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:20, 20.76s/it][A

tensor(0.7244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<27:05, 20.84s/it][A

tensor(0.6975, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:46, 20.86s/it][A

tensor(0.6577, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:26, 20.88s/it][A

tensor(0.6004, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:05, 20.87s/it][A

tensor(0.5091, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:46, 20.89s/it][A

tensor(0.6001, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:25, 20.90s/it][A

tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:04, 20.89s/it][A

tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:09<24:42, 20.88s/it][A

tensor(0.5637, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:30<24:21, 20.88s/it][A

tensor(0.6336, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:51<24:00, 20.87s/it][A

tensor(0.5639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:12<23:39, 20.87s/it][A

tensor(0.5835, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:33<23:19, 20.89s/it][A

tensor(0.5678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:53<22:45, 20.69s/it][A

tensor(0.6222, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:14<22:18, 20.58s/it][A

tensor(0.5813, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:34<21:52, 20.50s/it][A

tensor(0.7212, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:54<21:30, 20.48s/it][A

tensor(0.5393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:15<21:07, 20.44s/it][A

tensor(0.6207, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:35<20:44, 20.40s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:55<20:23, 20.39s/it][A

tensor(0.5674, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:16<20:02, 20.38s/it][A

tensor(0.7120, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<19:40, 20.36s/it][A

tensor(0.5788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:56<19:19, 20.33s/it][A

tensor(0.7098, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:17<18:58, 20.32s/it][A

tensor(0.6593, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:37<18:33, 20.25s/it][A

tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:57<18:14, 20.27s/it][A

tensor(0.7058, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:17<17:55, 20.29s/it][A

tensor(0.6847, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:38<17:35, 20.29s/it][A

tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:58<17:16, 20.33s/it][A

tensor(0.6074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:18<16:55, 20.32s/it][A

tensor(0.5891, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:39<16:36, 20.35s/it][A

tensor(0.6024, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:59<16:16, 20.34s/it][A

tensor(0.6305, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:19<15:56, 20.36s/it][A

tensor(0.6785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:40<15:34, 20.32s/it][A

tensor(0.5689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:00<15:15, 20.33s/it][A

tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:20<14:55, 20.35s/it][A

tensor(0.5603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:41<14:34, 20.34s/it][A

tensor(0.6587, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:01<14:13, 20.32s/it][A

tensor(0.7160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:22<13:55, 20.39s/it][A

tensor(0.5924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:42<13:35, 20.38s/it][A

tensor(0.5119, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:02<13:13, 20.35s/it][A

tensor(0.6014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:22<12:51, 20.31s/it][A

tensor(0.5391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:43<12:32, 20.33s/it][A

tensor(0.5056, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:03<12:11, 20.31s/it][A

tensor(0.6198, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:23<11:50, 20.30s/it][A

tensor(0.5840, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:44<11:30, 20.30s/it][A

tensor(0.5383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:04<11:10, 20.33s/it][A

tensor(0.6822, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:24<10:51, 20.37s/it][A

tensor(0.5038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:45<10:31, 20.37s/it][A

tensor(0.6903, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:05<10:11, 20.40s/it][A

tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:26<09:51, 20.40s/it][A

tensor(0.6545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:46<09:31, 20.40s/it][A

tensor(0.5966, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:06<09:10, 20.37s/it][A

tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:27<08:49, 20.36s/it][A

tensor(0.5217, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:47<08:28, 20.36s/it][A

tensor(0.7046, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:07<08:08, 20.34s/it][A

tensor(0.6581, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:28<07:48, 20.36s/it][A

tensor(0.4609, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:48<07:26, 20.30s/it][A

tensor(0.6033, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:08<07:06, 20.32s/it][A

tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:29<06:47, 20.35s/it][A

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:49<06:27, 20.38s/it][A

tensor(0.7001, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:10<06:07, 20.43s/it][A

tensor(0.5760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:30<05:47, 20.43s/it][A

tensor(0.6319, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:51<05:27, 20.44s/it][A

tensor(0.5472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:11<05:06, 20.43s/it][A

tensor(0.5158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:31<04:45, 20.40s/it][A

tensor(0.5775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:52<04:24, 20.37s/it][A

tensor(0.5266, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:12<04:04, 20.37s/it][A

tensor(0.5706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:32<03:43, 20.36s/it][A

tensor(0.5551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:53<03:23, 20.36s/it][A

tensor(0.5319, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:13<03:03, 20.35s/it][A

tensor(0.5878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:33<02:42, 20.34s/it][A

tensor(0.7620, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:54<02:22, 20.33s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:14<02:02, 20.33s/it][A

tensor(0.5526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:34<01:41, 20.35s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:55<01:21, 20.35s/it][A

tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:15<01:00, 20.33s/it][A

tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:35<00:40, 20.34s/it][A

tensor(0.5931, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:56<00:20, 20.34s/it][A

tensor(0.5930, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:03<00:00, 20.28s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5461, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6087494115513491

	train acc: 0.643209200438116

	training prec: 0.8485459891971294

	training rec: 0.643209200438116

	training f1: 0.7046209906902694

	Current Learning rate:  2.2857142857142858e-05



  2%|▏         | 1/42 [00:02<01:42,  2.50s/it][A
  5%|▍         | 2/42 [00:05<01:41,  2.54s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.55s/it][A
 10%|▉         | 4/42 [00:10<01:36,  2.53s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.54s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.55s/it][A
 17%|█▋        | 7/42 [00:17<01:28,  2.53s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.54s/it][A
 21%|██▏       | 9/42 [00:22<01:24,  2.55s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.53s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.54s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.55s/it][A
 31%|███       | 13/42 [00:33<01:13,  2.54s/it][A
 33%|███▎      | 14/42 [00:35<01:11,  2.54s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.55s/it][A
 38%|███▊      | 16/42 [00:40<01:06,  2.54s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.55s/it][A
 43%|████▎     | 18/42 [00:45<01:01,  2.55s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.54s/it][A
 48%|████▊     | 20/42 [00:50<00:56,  2


	Validation loss: 0.5820278347957701

	Validation acc: 0.6309523809523809

	Validation prec: 0.8612857345462014

	Validation rec: 0.6309523809523809

	Validation f1: 0.6941094514292039
loss: 


  1%|          | 1/83 [00:20<27:51, 20.38s/it][A

tensor(0.6807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:31, 20.38s/it][A

tensor(0.6386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:08, 20.36s/it][A

tensor(0.5360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:21<26:45, 20.32s/it][A

tensor(0.5774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:41<26:26, 20.34s/it][A

tensor(0.5568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:02<26:05, 20.34s/it][A

tensor(0.5287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:22<25:46, 20.35s/it][A

tensor(0.6868, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:42<25:25, 20.34s/it][A

tensor(0.5362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:03<25:05, 20.35s/it][A

tensor(0.5449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:23<24:44, 20.34s/it][A

tensor(0.5973, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:43<24:23, 20.33s/it][A

tensor(0.5733, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:04<24:02, 20.32s/it][A

tensor(0.6122, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:24<23:43, 20.33s/it][A

tensor(0.5397, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:44<23:21, 20.31s/it][A

tensor(0.5816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:05<23:02, 20.33s/it][A

tensor(0.5150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:25<22:42, 20.33s/it][A

tensor(0.5827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:45<22:21, 20.32s/it][A

tensor(0.5979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:05<21:59, 20.30s/it][A

tensor(0.5216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:26<21:40, 20.31s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:46<21:19, 20.32s/it][A

tensor(0.6643, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:06<20:59, 20.31s/it][A

tensor(0.6868, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:27<20:38, 20.30s/it][A

tensor(0.5240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:47<20:18, 20.30s/it][A

tensor(0.5606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:07<19:58, 20.32s/it][A

tensor(0.5942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:28<19:38, 20.31s/it][A

tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:48<19:17, 20.30s/it][A

tensor(0.6915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:08<18:58, 20.32s/it][A

tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:29<18:37, 20.31s/it][A

tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:49<18:16, 20.31s/it][A

tensor(0.5795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:09<17:57, 20.33s/it][A

tensor(0.5872, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:30<17:36, 20.32s/it][A

tensor(0.5723, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:50<17:15, 20.31s/it][A

tensor(0.6529, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:10<16:55, 20.30s/it][A

tensor(0.5698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:30<16:34, 20.29s/it][A

tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:51<16:13, 20.28s/it][A

tensor(0.6787, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:11<15:54, 20.31s/it][A

tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:31<15:34, 20.31s/it][A

tensor(0.6671, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:52<15:14, 20.31s/it][A

tensor(0.5816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:12<14:54, 20.33s/it][A

tensor(0.5261, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:32<14:33, 20.31s/it][A

tensor(0.5929, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:53<14:12, 20.31s/it][A

tensor(0.6336, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:13<13:53, 20.34s/it][A

tensor(0.6209, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:33<13:33, 20.35s/it][A

tensor(0.5328, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:54<13:13, 20.35s/it][A

tensor(0.5868, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:14<12:53, 20.37s/it][A

tensor(0.4813, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:35<12:34, 20.39s/it][A

tensor(0.6342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:55<12:13, 20.38s/it][A

tensor(0.5784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:15<11:53, 20.39s/it][A

tensor(0.6086, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:36<11:32, 20.37s/it][A

tensor(0.5765, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:56<11:11, 20.36s/it][A

tensor(0.5161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:16<10:51, 20.35s/it][A

tensor(0.5296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:37<10:30, 20.34s/it][A

tensor(0.6213, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:57<10:09, 20.31s/it][A

tensor(0.7272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:17<09:49, 20.33s/it][A

tensor(0.7099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:38<09:29, 20.32s/it][A

tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [18:58<09:08, 20.31s/it][A

tensor(0.6431, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:18<08:48, 20.32s/it][A

tensor(0.6010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:39<08:28, 20.33s/it][A

tensor(0.5898, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [19:59<08:07, 20.32s/it][A

tensor(0.6661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:19<07:47, 20.32s/it][A

tensor(0.6017, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:40<07:27, 20.34s/it][A

tensor(0.6142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:00<07:07, 20.34s/it][A

tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:20<06:46, 20.33s/it][A

tensor(0.7375, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:41<06:26, 20.33s/it][A

tensor(0.5557, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:01<06:05, 20.32s/it][A

tensor(0.6792, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:21<05:45, 20.32s/it][A

tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:41<05:25, 20.32s/it][A

tensor(0.6872, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:02<05:05, 20.34s/it][A

tensor(0.5794, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:22<04:44, 20.33s/it][A

tensor(0.6292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:42<04:24, 20.33s/it][A

tensor(0.6745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:03<04:03, 20.31s/it][A

tensor(0.5095, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:23<03:43, 20.32s/it][A

tensor(0.7199, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:43<03:23, 20.32s/it][A

tensor(0.6357, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:04<03:03, 20.34s/it][A

tensor(0.5378, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:24<02:42, 20.34s/it][A

tensor(0.6999, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:44<02:22, 20.34s/it][A

tensor(0.5824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:05<02:02, 20.35s/it][A

tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:25<01:41, 20.34s/it][A

tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [26:46<01:21, 20.35s/it][A

tensor(0.7274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:06<01:01, 20.37s/it][A

tensor(0.5755, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:26<00:40, 20.35s/it][A

tensor(0.6489, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [27:47<00:20, 20.33s/it][A

tensor(0.5689, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [27:54<00:00, 20.17s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6087301046733397

	train acc: 0.6535631161007667

	training prec: 0.8526819372097298

	training rec: 0.6535631161007667

	training f1: 0.7122345456925313

	Current Learning rate:  2.1428571428571428e-05



  2%|▏         | 1/42 [00:02<01:45,  2.58s/it][A
  5%|▍         | 2/42 [00:05<01:41,  2.53s/it][A
  7%|▋         | 3/42 [00:07<01:39,  2.55s/it][A
 10%|▉         | 4/42 [00:10<01:37,  2.56s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.53s/it][A
 14%|█▍        | 6/42 [00:15<01:31,  2.54s/it][A
 17%|█▋        | 7/42 [00:17<01:29,  2.55s/it][A
 19%|█▉        | 8/42 [00:20<01:26,  2.53s/it][A
 21%|██▏       | 9/42 [00:22<01:23,  2.54s/it][A
 24%|██▍       | 10/42 [00:25<01:21,  2.55s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.54s/it][A
 29%|██▊       | 12/42 [00:30<01:16,  2.54s/it][A
 31%|███       | 13/42 [00:33<01:13,  2.55s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.53s/it][A
 36%|███▌      | 15/42 [00:38<01:08,  2.54s/it][A
 38%|███▊      | 16/42 [00:40<01:06,  2.55s/it][A
 40%|████      | 17/42 [00:43<01:03,  2.53s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.54s/it][A
 45%|████▌     | 19/42 [00:48<00:58,  2.55s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation loss: 0.5927822596970058

	Validation acc: 0.5786210317460317

	Validation prec: 0.8506636411158747

	Validation rec: 0.5786210317460317

	Validation f1: 0.6458765196319621
loss: 


  1%|          | 1/83 [00:20<27:48, 20.35s/it][A

tensor(0.6016, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:26, 20.33s/it][A

tensor(0.5255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:00<27:06, 20.33s/it][A

tensor(0.7390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:21<26:47, 20.35s/it][A

tensor(0.5554, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:41<26:25, 20.33s/it][A

tensor(0.5428, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:02<26:05, 20.33s/it][A

tensor(0.5211, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:22<25:45, 20.33s/it][A

tensor(0.5769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:42<25:25, 20.34s/it][A

tensor(0.6970, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:03<25:05, 20.34s/it][A

tensor(0.6273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:23<24:45, 20.35s/it][A

tensor(0.6059, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:43<24:24, 20.34s/it][A

tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:04<24:03, 20.33s/it][A

tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:24<23:42, 20.32s/it][A

tensor(0.6074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:44<23:21, 20.31s/it][A

tensor(0.5550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:04<23:01, 20.32s/it][A

tensor(0.5762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:25<22:41, 20.33s/it][A

tensor(0.5828, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:45<22:20, 20.31s/it][A

tensor(0.5530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:05<22:01, 20.33s/it][A

tensor(0.5473, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:26<21:40, 20.32s/it][A

tensor(0.5816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:46<21:20, 20.32s/it][A

tensor(0.6670, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:06<21:00, 20.33s/it][A

tensor(0.7521, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:27<20:39, 20.32s/it][A

tensor(0.5470, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:47<20:18, 20.31s/it][A

tensor(0.5925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:07<19:57, 20.30s/it][A

tensor(0.5472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:28<19:38, 20.31s/it][A

tensor(0.5787, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:48<19:17, 20.30s/it][A

tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:08<18:57, 20.32s/it][A

tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:29<18:38, 20.33s/it][A

tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:49<18:17, 20.32s/it][A

tensor(0.7424, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:09<17:55, 20.29s/it][A

tensor(0.6162, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:29<17:35, 20.29s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:50<17:14, 20.29s/it][A

tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:10<16:55, 20.31s/it][A

tensor(0.5014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:30<16:34, 20.31s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:51<16:14, 20.29s/it][A

tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:11<15:53, 20.29s/it][A

tensor(0.7220, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:31<15:32, 20.28s/it][A

tensor(0.5813, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [12:51<15:12, 20.27s/it][A

tensor(0.5754, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:12<14:52, 20.29s/it][A

tensor(0.7353, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:32<14:33, 20.31s/it][A

tensor(0.4289, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [13:52<14:12, 20.31s/it][A

tensor(0.6107, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:13<13:51, 20.29s/it][A

tensor(0.5572, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:33<13:31, 20.29s/it][A

tensor(0.5828, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [14:53<13:10, 20.28s/it][A

tensor(0.5380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:13<12:50, 20.28s/it][A

tensor(0.6004, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:34<12:29, 20.26s/it][A

tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [15:54<12:09, 20.26s/it][A

tensor(0.8394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:15<11:52, 20.36s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:35<11:38, 20.54s/it][A

tensor(0.5147, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [16:56<11:21, 20.66s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:18<11:04, 20.78s/it][A

tensor(0.7271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:38<10:45, 20.82s/it][A

tensor(0.5161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [17:59<10:25, 20.84s/it][A

tensor(0.5569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:20<09:59, 20.67s/it][A

tensor(0.6388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:41<09:41, 20.75s/it][A

tensor(0.5293, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:01<09:21, 20.80s/it][A

tensor(0.5761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:22<09:01, 20.83s/it][A

tensor(0.6995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:42<08:28, 20.33s/it][A

tensor(0.6030, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:03<08:13, 20.55s/it][A

tensor(0.5598, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:23<07:54, 20.65s/it][A

tensor(0.5776, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:44<07:35, 20.72s/it][A

tensor(0.6110, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:05<07:16, 20.76s/it][A

tensor(0.5985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:26<06:56, 20.82s/it][A

tensor(0.7042, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:47<06:36, 20.85s/it][A

tensor(0.5316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:08<06:16, 20.89s/it][A

tensor(0.5151, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:29<05:55, 20.91s/it][A

tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [22:50<05:34, 20.91s/it][A

tensor(0.5615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:11<05:13, 20.93s/it][A

tensor(0.7108, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:32<04:53, 20.93s/it][A

tensor(0.5277, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [23:53<04:32, 20.94s/it][A

tensor(0.6071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:14<04:11, 20.94s/it][A

tensor(0.5703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:35<03:50, 20.97s/it][A

tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [24:56<03:29, 20.99s/it][A

tensor(0.6355, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:17<03:08, 20.96s/it][A

tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:38<02:47, 20.97s/it][A

tensor(0.6336, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [25:59<02:26, 20.94s/it][A

tensor(0.5164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:19<02:05, 20.93s/it][A

tensor(0.6380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:40<01:43, 20.70s/it][A

tensor(0.5555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:00<01:22, 20.54s/it][A

tensor(0.5763, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:20<01:01, 20.44s/it][A

tensor(0.6895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:40<00:40, 20.39s/it][A

tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:01<00:20, 20.35s/it][A

tensor(0.5355, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:08<00:00, 20.34s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6239, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6086070440619825

	train acc: 0.6511329408543264

	training prec: 0.8576133983300183

	training rec: 0.6511329408543264

	training f1: 0.7121885018455777

	Current Learning rate:  2e-05



  2%|▏         | 1/42 [00:02<01:44,  2.54s/it][A
  5%|▍         | 2/42 [00:05<01:41,  2.54s/it][A
  7%|▋         | 3/42 [00:07<01:37,  2.50s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.52s/it][A
 12%|█▏        | 5/42 [00:12<01:33,  2.52s/it][A
 14%|█▍        | 6/42 [00:15<01:29,  2.50s/it][A
 17%|█▋        | 7/42 [00:17<01:27,  2.51s/it][A
 19%|█▉        | 8/42 [00:20<01:25,  2.52s/it][A
 21%|██▏       | 9/42 [00:22<01:22,  2.50s/it][A
 24%|██▍       | 10/42 [00:25<01:20,  2.51s/it][A
 26%|██▌       | 11/42 [00:27<01:18,  2.52s/it][A
 29%|██▊       | 12/42 [00:30<01:15,  2.50s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.51s/it][A
 33%|███▎      | 14/42 [00:35<01:10,  2.52s/it][A
 36%|███▌      | 15/42 [00:37<01:07,  2.50s/it][A
 38%|███▊      | 16/42 [00:40<01:05,  2.51s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.52s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.50s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.51s/it][A
 48%|████▊     | 20/42 [00:50<00:55,  2


	Validation loss: 0.5797574775559562

	Validation acc: 0.6413690476190477

	Validation prec: 0.8622766067152537

	Validation rec: 0.6413690476190477

	Validation f1: 0.7047063235987047
loss: 


  1%|          | 1/83 [00:20<28:00, 20.49s/it][A

tensor(0.6069, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:40<27:33, 20.42s/it][A

tensor(0.6709, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:06<30:26, 22.83s/it][A

tensor(0.5631, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:34<32:56, 25.02s/it][A

loss: tensor(0.6978, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:02<33:42, 25.93s/it][A

tensor(0.6795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:29<33:50, 26.36s/it][A

tensor(0.6269, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:56<33:42, 26.61s/it][A

tensor(0.5824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:23<33:29, 26.79s/it][A

tensor(0.5678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:51<33:15, 26.97s/it][A

tensor(0.6616, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:18<32:49, 26.98s/it][A

tensor(0.6306, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:45<32:17, 26.91s/it][A

tensor(0.5757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:12<31:57, 27.01s/it][A

tensor(0.5299, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:40<31:44, 27.21s/it][A

tensor(0.5019, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:06<31:10, 27.10s/it][A

tensor(0.5472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:33<30:36, 27.01s/it][A

tensor(0.6865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:00<30:06, 26.96s/it][A

tensor(0.6759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:26<29:29, 26.81s/it][A

tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [07:53<29:03, 26.83s/it][A

tensor(0.5598, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:20<28:30, 26.73s/it][A

tensor(0.5992, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [08:46<28:01, 26.68s/it][A

tensor(0.6660, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:13<27:30, 26.63s/it][A

tensor(0.6023, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [09:40<27:10, 26.73s/it][A

tensor(0.6243, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:06<26:41, 26.68s/it][A

tensor(0.5016, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:33<26:11, 26.63s/it][A

tensor(0.5333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [10:59<25:41, 26.58s/it][A

tensor(0.4621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:26<25:13, 26.56s/it][A

tensor(0.5346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [11:53<24:52, 26.66s/it][A

tensor(0.5549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:24<25:35, 27.92s/it][A

tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [12:52<25:06, 27.89s/it][A

tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:19<24:29, 27.73s/it][A

tensor(0.5869, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [13:47<24:01, 27.71s/it][A

tensor(0.5491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:14<23:26, 27.58s/it][A

tensor(0.6996, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [14:41<22:53, 27.48s/it][A

tensor(0.5237, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:08<22:23, 27.43s/it][A

tensor(0.7441, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:36<21:56, 27.43s/it][A

tensor(0.6633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:03<21:31, 27.49s/it][A

tensor(0.6719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:31<20:59, 27.39s/it][A

tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [16:58<20:31, 27.36s/it][A

tensor(0.6032, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:25<20:02, 27.34s/it][A

tensor(0.5806, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [17:53<19:41, 27.48s/it][A

tensor(0.7430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:20<19:09, 27.37s/it][A

tensor(0.5612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [18:47<18:38, 27.28s/it][A

tensor(0.6800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:14<18:08, 27.21s/it][A

tensor(0.5866, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [19:42<17:44, 27.30s/it][A

tensor(0.6539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:09<17:12, 27.16s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:36<16:42, 27.10s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:02<16:13, 27.05s/it][A

tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:29<15:45, 27.02s/it][A

tensor(0.5604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [21:57<15:21, 27.11s/it][A

tensor(0.6582, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:24<14:53, 27.07s/it][A

tensor(0.5835, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [22:51<14:25, 27.03s/it][A

tensor(0.5253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:18<13:56, 26.99s/it][A

tensor(0.4851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [23:45<13:33, 27.11s/it][A

tensor(0.6137, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:12<13:05, 27.10s/it][A

tensor(0.7181, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [24:39<12:37, 27.05s/it][A

tensor(0.7156, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:06<12:09, 27.03s/it][A

tensor(0.5371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:33<11:42, 27.00s/it][A

tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:00<11:17, 27.12s/it][A

tensor(0.6067, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:27<10:49, 27.07s/it][A

tensor(0.6414, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [26:54<10:22, 27.06s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:21<09:55, 27.07s/it][A

tensor(0.6137, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [27:49<09:30, 27.15s/it][A

tensor(0.5296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:16<09:02, 27.11s/it][A

tensor(0.4935, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [28:43<08:34, 27.06s/it][A

tensor(0.5451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:10<08:06, 27.03s/it][A

tensor(0.6572, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [29:36<07:38, 27.00s/it][A

tensor(0.6071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:04<07:13, 27.10s/it][A

tensor(0.5177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:31<06:46, 27.12s/it][A

tensor(0.5346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [30:58<06:19, 27.13s/it][A

tensor(0.6688, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:25<05:52, 27.08s/it][A

tensor(0.5525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [31:52<05:25, 27.15s/it][A

tensor(0.5994, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:19<04:57, 27.08s/it][A

tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [32:46<04:30, 27.02s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:13<04:02, 26.93s/it][A

tensor(0.5912, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [33:40<03:35, 26.89s/it][A

tensor(0.5368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:07<03:08, 27.00s/it][A

tensor(0.6119, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [34:34<02:41, 26.96s/it][A

tensor(0.5626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:01<02:14, 26.94s/it][A

tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:28<01:47, 26.92s/it][A

tensor(0.5000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [35:54<01:20, 26.88s/it][A

tensor(0.5518, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:22<00:53, 26.98s/it][A

tensor(0.6457, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [36:49<00:26, 26.96s/it][A

tensor(0.7171, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [36:58<00:00, 26.73s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4922, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6021623324198895

	train acc: 0.662000273822563

	training prec: 0.8546923916489653

	training rec: 0.662000273822563

	training f1: 0.7188119729437014

	Current Learning rate:  1.8571428571428572e-05



  2%|▏         | 1/42 [00:03<02:17,  3.36s/it][A
  5%|▍         | 2/42 [00:06<02:15,  3.38s/it][A
  7%|▋         | 3/42 [00:10<02:12,  3.40s/it][A
 10%|▉         | 4/42 [00:13<02:08,  3.38s/it][A
 12%|█▏        | 5/42 [00:16<02:05,  3.39s/it][A
 14%|█▍        | 6/42 [00:20<02:02,  3.40s/it][A
 17%|█▋        | 7/42 [00:23<01:58,  3.39s/it][A
 19%|█▉        | 8/42 [00:27<01:55,  3.40s/it][A
 21%|██▏       | 9/42 [00:30<01:52,  3.41s/it][A
 24%|██▍       | 10/42 [00:34<01:49,  3.42s/it][A
 26%|██▌       | 11/42 [00:37<01:46,  3.43s/it][A
 29%|██▊       | 12/42 [00:40<01:43,  3.44s/it][A
 31%|███       | 13/42 [00:44<01:38,  3.41s/it][A
 33%|███▎      | 14/42 [00:47<01:35,  3.41s/it][A
 36%|███▌      | 15/42 [00:51<01:32,  3.42s/it][A
 38%|███▊      | 16/42 [00:54<01:28,  3.41s/it][A
 40%|████      | 17/42 [00:57<01:25,  3.41s/it][A
 43%|████▎     | 18/42 [01:01<01:24,  3.53s/it][A
 45%|████▌     | 19/42 [01:05<01:20,  3.48s/it][A
 48%|████▊     | 20/42 [01:08<01:16,  3


	Validation loss: 0.5809410576309476

	Validation acc: 0.6393849206349207

	Validation prec: 0.8601891909126811

	Validation rec: 0.6393849206349207

	Validation f1: 0.7011881841703789
loss: 


  1%|          | 1/83 [00:27<37:26, 27.39s/it][A

tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:54<37:05, 27.47s/it][A

tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:22<36:36, 27.46s/it][A

tensor(0.5285, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:49<36:04, 27.40s/it][A

tensor(0.5532, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:17<35:37, 27.41s/it][A

tensor(0.5911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:44<35:17, 27.50s/it][A

tensor(0.5302, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:12<34:45, 27.45s/it][A

tensor(0.6394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:39<34:14, 27.40s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:06<33:46, 27.39s/it][A

tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:34<33:17, 27.36s/it][A

tensor(0.5896, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [05:01<32:58, 27.48s/it][A

tensor(0.5856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:29<32:26, 27.41s/it][A

tensor(0.5097, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:56<31:58, 27.41s/it][A

tensor(0.5472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:23<31:30, 27.40s/it][A

tensor(0.5059, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:51<31:13, 27.55s/it][A

tensor(0.5236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:18<30:38, 27.44s/it][A

tensor(0.5841, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:45<29:59, 27.27s/it][A

tensor(0.7250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:12<29:24, 27.15s/it][A

tensor(0.6233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:39<28:51, 27.06s/it][A

tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:06<28:29, 27.13s/it][A

tensor(0.6263, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:33<27:56, 27.03s/it][A

tensor(0.6373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [10:00<27:26, 26.99s/it][A

tensor(0.5778, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:27<26:57, 26.96s/it][A

tensor(0.5569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:54<26:36, 27.06s/it][A

tensor(0.6209, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:21<26:06, 27.01s/it][A

tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:48<25:37, 26.97s/it][A

tensor(0.5884, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:15<25:10, 26.98s/it][A

tensor(0.5139, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:42<24:40, 26.93s/it][A

tensor(0.5686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:09<24:20, 27.05s/it][A

tensor(0.6271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:36<23:52, 27.02s/it][A

tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:03<23:22, 26.97s/it][A

tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:30<22:54, 26.95s/it][A

tensor(0.6194, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [14:57<22:34, 27.09s/it][A

tensor(0.6404, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:24<22:01, 26.97s/it][A

tensor(0.6916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:51<21:33, 26.95s/it][A

tensor(0.5442, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:18<21:07, 26.96s/it][A

tensor(0.7272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:45<20:40, 26.96s/it][A

tensor(0.6878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:12<20:20, 27.12s/it][A

tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:39<19:52, 27.10s/it][A

tensor(0.6636, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:06<19:24, 27.07s/it][A

tensor(0.6581, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:33<18:56, 27.07s/it][A

tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:01<18:32, 27.13s/it][A

tensor(0.6502, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:28<18:02, 27.06s/it][A

tensor(0.5177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [19:54<17:33, 27.01s/it][A

tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:21<17:05, 26.98s/it][A

tensor(0.6588, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:48<16:38, 26.98s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:16<16:15, 27.09s/it][A

tensor(0.7012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:43<15:47, 27.06s/it][A

tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:10<15:18, 27.02s/it][A

tensor(0.7046, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:37<14:50, 26.98s/it][A

tensor(0.5382, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:04<14:26, 27.07s/it][A

tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:31<13:58, 27.04s/it][A

tensor(0.6035, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [23:58<13:30, 27.01s/it][A

tensor(0.5650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:25<13:02, 26.97s/it][A

tensor(0.5205, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [24:51<12:33, 26.93s/it][A

tensor(0.6199, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:19<12:10, 27.04s/it][A

tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:46<11:42, 27.02s/it][A

tensor(0.6344, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:13<11:14, 26.99s/it][A

tensor(0.5434, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:39<10:46, 26.95s/it][A

tensor(0.5207, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:07<10:22, 27.07s/it][A

tensor(0.5158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:34<09:54, 27.04s/it][A

tensor(0.5128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:01<09:27, 27.02s/it][A

tensor(0.6810, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:28<08:59, 26.98s/it][A

tensor(0.5451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [28:54<08:31, 26.94s/it][A

tensor(0.5387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:22<08:06, 27.04s/it][A

tensor(0.5386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [29:49<07:38, 27.00s/it][A

tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:16<07:11, 26.96s/it][A

tensor(0.5216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:42<06:43, 26.93s/it][A

tensor(0.6031, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:09<06:16, 26.91s/it][A

tensor(0.5643, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:36<05:50, 26.98s/it][A

tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:03<05:23, 26.98s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:31<04:57, 27.02s/it][A

tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [32:58<04:30, 27.05s/it][A

tensor(0.6259, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:25<04:04, 27.20s/it][A

tensor(0.6814, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [33:52<03:37, 27.18s/it][A

tensor(0.5116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:19<03:10, 27.15s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [34:47<02:42, 27.17s/it][A

tensor(0.5996, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:14<02:15, 27.12s/it][A

tensor(0.5852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:41<01:48, 27.17s/it][A

tensor(0.5748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:08<01:21, 27.10s/it][A

tensor(0.5694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:35<00:54, 27.06s/it][A

tensor(0.5587, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:02<00:27, 27.04s/it][A

tensor(0.4593, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:11<00:00, 26.89s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5763, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5983927113464079

	train acc: 0.6497980558598028

	training prec: 0.8577143603013393

	training rec: 0.6497980558598028

	training f1: 0.7107713042834062

	Current Learning rate:  1.7142857142857145e-05



  2%|▏         | 1/42 [00:03<02:21,  3.45s/it][A
  5%|▍         | 2/42 [00:07<02:25,  3.64s/it][A
  7%|▋         | 3/42 [00:10<02:18,  3.55s/it][A
 10%|▉         | 4/42 [00:14<02:13,  3.50s/it][A
 12%|█▏        | 5/42 [00:17<02:08,  3.47s/it][A
 14%|█▍        | 6/42 [00:20<02:04,  3.45s/it][A
 17%|█▋        | 7/42 [00:24<02:00,  3.44s/it][A
 19%|█▉        | 8/42 [00:27<01:56,  3.43s/it][A
 21%|██▏       | 9/42 [00:31<01:53,  3.43s/it][A
 24%|██▍       | 10/42 [00:34<01:49,  3.43s/it][A
 26%|██▌       | 11/42 [00:38<01:46,  3.42s/it][A
 29%|██▊       | 12/42 [00:41<01:42,  3.42s/it][A
 31%|███       | 13/42 [00:44<01:39,  3.42s/it][A
 33%|███▎      | 14/42 [00:48<01:35,  3.41s/it][A
 36%|███▌      | 15/42 [00:51<01:32,  3.41s/it][A
 38%|███▊      | 16/42 [00:55<01:28,  3.42s/it][A
 40%|████      | 17/42 [00:58<01:25,  3.40s/it][A
 43%|████▎     | 18/42 [01:01<01:21,  3.41s/it][A
 45%|████▌     | 19/42 [01:05<01:18,  3.41s/it][A
 48%|████▊     | 20/42 [01:08<01:14,  3


	Validation loss: 0.5818793723980585

	Validation acc: 0.6431051587301587

	Validation prec: 0.86297089559508

	Validation rec: 0.6431051587301587

	Validation f1: 0.7065185823565701
loss: 


  1%|          | 1/83 [00:27<37:01, 27.09s/it][A

tensor(0.5474, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:54<36:34, 27.09s/it][A

tensor(0.6355, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:21<36:05, 27.07s/it][A

tensor(0.5786, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:48<35:49, 27.21s/it][A

tensor(0.6185, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:15<35:25, 27.25s/it][A

tensor(0.5706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:43<34:57, 27.24s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:10<34:23, 27.16s/it][A

tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:37<33:53, 27.12s/it][A

tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:04<33:34, 27.23s/it][A

tensor(0.5416, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:31<33:04, 27.18s/it][A

tensor(0.7175, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:58<32:33, 27.13s/it][A

tensor(0.6688, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:25<32:04, 27.10s/it][A

tensor(0.5677, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:53<31:44, 27.20s/it][A

tensor(0.6041, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:20<31:15, 27.19s/it][A

tensor(0.5423, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:47<30:46, 27.16s/it][A

tensor(0.5624, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:14<30:18, 27.14s/it][A

tensor(0.8201, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:41<29:49, 27.11s/it][A

tensor(0.6388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:09<29:28, 27.21s/it][A

tensor(0.6967, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:36<28:59, 27.17s/it][A

tensor(0.5384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:03<28:30, 27.15s/it][A

tensor(0.4994, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:30<28:01, 27.12s/it][A

tensor(0.6095, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [09:57<27:41, 27.23s/it][A

tensor(0.5761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:24<27:10, 27.18s/it][A

tensor(0.5209, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:51<26:41, 27.15s/it][A

tensor(0.5667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:19<26:14, 27.14s/it][A

tensor(0.6544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:46<25:46, 27.13s/it][A

tensor(0.5483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:13<25:24, 27.23s/it][A

tensor(0.4611, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:40<24:54, 27.17s/it][A

tensor(0.6220, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:07<24:24, 27.12s/it][A

tensor(0.6588, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:34<23:55, 27.08s/it][A

tensor(0.5713, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:02<23:34, 27.21s/it][A

tensor(0.5418, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:29<23:04, 27.15s/it][A

tensor(0.6910, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [14:56<22:35, 27.12s/it][A

tensor(0.6840, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:23<22:07, 27.10s/it][A

tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:50<21:40, 27.09s/it][A

tensor(0.5092, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:17<21:17, 27.17s/it][A

tensor(0.5187, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:44<20:48, 27.15s/it][A

tensor(0.5643, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:11<20:19, 27.11s/it][A

tensor(0.7063, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:38<19:49, 27.04s/it][A

tensor(0.5540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:05<19:25, 27.11s/it][A

tensor(0.5828, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:32<18:56, 27.05s/it][A

tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [18:59<18:26, 26.98s/it][A

tensor(0.5525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:26<17:57, 26.93s/it][A

tensor(0.5532, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [19:53<17:28, 26.89s/it][A

tensor(0.5848, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:20<17:05, 26.99s/it][A

tensor(0.6463, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:47<16:39, 27.00s/it][A

tensor(0.5246, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:14<16:12, 27.02s/it][A

tensor(0.5881, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:41<15:46, 27.03s/it][A

tensor(0.5627, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:08<15:21, 27.12s/it][A

tensor(0.5587, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:36<14:54, 27.09s/it][A

tensor(0.7451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:03<14:26, 27.08s/it][A

tensor(0.6139, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:30<13:59, 27.07s/it][A

tensor(0.6417, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [23:57<13:31, 27.05s/it][A

tensor(0.6676, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:24<13:07, 27.15s/it][A

tensor(0.6115, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [24:51<12:39, 27.12s/it][A

tensor(0.5873, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:18<12:11, 27.09s/it][A

tensor(0.6591, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:45<11:43, 27.05s/it][A

tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:12<11:16, 27.04s/it][A

tensor(0.6080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:39<10:49, 27.08s/it][A

tensor(0.5696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:06<10:22, 27.05s/it][A

tensor(0.6841, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:33<09:54, 27.02s/it][A

tensor(0.6014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:00<09:27, 27.02s/it][A

tensor(0.6614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:28<09:03, 27.15s/it][A

tensor(0.7020, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [28:55<08:34, 27.09s/it][A

tensor(0.6513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:22<08:06, 27.06s/it][A

tensor(0.5534, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [29:48<07:38, 27.00s/it][A

tensor(0.5971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:15<07:11, 26.94s/it][A

tensor(0.5719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:42<06:45, 27.04s/it][A

tensor(0.5713, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:09<06:17, 26.99s/it][A

tensor(0.6723, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:36<05:50, 26.94s/it][A

tensor(0.5364, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:03<05:22, 26.90s/it][A

tensor(0.7266, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:30<04:57, 27.06s/it][A

tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [32:57<04:29, 26.99s/it][A

tensor(0.6701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:24<04:02, 26.97s/it][A

tensor(0.4928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [33:51<03:35, 26.91s/it][A

tensor(0.5324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:18<03:08, 26.89s/it][A

tensor(0.7939, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [34:45<02:41, 27.00s/it][A

tensor(0.5682, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:12<02:14, 26.96s/it][A

tensor(0.6097, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:39<01:47, 26.93s/it][A

tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:06<01:20, 26.91s/it][A

tensor(0.6725, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:33<00:54, 27.04s/it][A

tensor(0.5219, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:00<00:26, 26.99s/it][A

tensor(0.5221, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:09<00:00, 26.86s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7226, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6057591054094843

	train acc: 0.661657995618839

	training prec: 0.8501039349852417

	training rec: 0.661657995618839

	training f1: 0.7202275564529103

	Current Learning rate:  1.5714285714285715e-05



  2%|▏         | 1/42 [00:03<02:20,  3.43s/it][A
  5%|▍         | 2/42 [00:06<02:17,  3.43s/it][A
  7%|▋         | 3/42 [00:10<02:12,  3.40s/it][A
 10%|▉         | 4/42 [00:13<02:09,  3.41s/it][A
 12%|█▏        | 5/42 [00:17<02:06,  3.42s/it][A
 14%|█▍        | 6/42 [00:20<02:02,  3.40s/it][A
 17%|█▋        | 7/42 [00:23<01:59,  3.41s/it][A
 19%|█▉        | 8/42 [00:27<01:56,  3.42s/it][A
 21%|██▏       | 9/42 [00:30<01:52,  3.40s/it][A
 24%|██▍       | 10/42 [00:34<01:49,  3.41s/it][A
 26%|██▌       | 11/42 [00:37<01:45,  3.42s/it][A
 29%|██▊       | 12/42 [00:40<01:41,  3.39s/it][A
 31%|███       | 13/42 [00:44<01:38,  3.40s/it][A
 33%|███▎      | 14/42 [00:47<01:35,  3.41s/it][A
 36%|███▌      | 15/42 [00:51<01:31,  3.40s/it][A
 38%|███▊      | 16/42 [00:54<01:28,  3.40s/it][A
 40%|████      | 17/42 [00:57<01:25,  3.42s/it][A
 43%|████▎     | 18/42 [01:01<01:21,  3.40s/it][A
 45%|████▌     | 19/42 [01:04<01:18,  3.41s/it][A
 48%|████▊     | 20/42 [01:08<01:15,  3


	Validation loss: 0.5812446524699529

	Validation acc: 0.646329365079365

	Validation prec: 0.8608671772912747

	Validation rec: 0.646329365079365

	Validation f1: 0.708485560083938
loss: 


  1%|          | 1/83 [00:27<36:57, 27.04s/it][A

tensor(0.5799, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:54<36:28, 27.02s/it][A

tensor(0.5288, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:21<36:17, 27.22s/it][A

tensor(0.5555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:48<35:43, 27.13s/it][A

tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:15<35:08, 27.04s/it][A

tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:42<34:40, 27.02s/it][A

tensor(0.5949, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:09<34:25, 27.18s/it][A

tensor(0.6647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:36<33:53, 27.11s/it][A

tensor(0.6330, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:03<33:21, 27.05s/it][A

tensor(0.7177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:30<32:51, 27.01s/it][A

tensor(0.7694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:57<32:24, 27.01s/it][A

tensor(0.6648, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:24<32:04, 27.10s/it][A

tensor(0.5249, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:51<31:33, 27.04s/it][A

tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:18<31:02, 26.99s/it][A

tensor(0.5991, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:45<30:33, 26.96s/it][A

tensor(0.5918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:13<30:16, 27.11s/it][A

tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:40<29:45, 27.05s/it][A

tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:06<29:14, 27.00s/it][A

tensor(0.6476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:33<28:49, 27.02s/it][A

tensor(0.5542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:01<28:23, 27.04s/it][A

tensor(0.5976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:28<28:01, 27.13s/it][A

tensor(0.5272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [09:55<27:30, 27.06s/it][A

tensor(0.7298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:22<27:01, 27.03s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:49<26:32, 26.99s/it][A

tensor(0.5905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:16<26:11, 27.10s/it][A

tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:43<25:40, 27.03s/it][A

tensor(0.5313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:10<25:11, 26.99s/it][A

tensor(0.5705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:37<24:42, 26.96s/it][A

tensor(0.6672, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:04<24:14, 26.94s/it][A

tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:31<23:53, 27.05s/it][A

tensor(0.5949, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [13:58<23:26, 27.05s/it][A

tensor(0.5852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:25<22:58, 27.03s/it][A

tensor(0.6819, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [14:52<22:29, 26.99s/it][A

tensor(0.6776, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:19<22:07, 27.09s/it][A

tensor(0.5878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:46<21:40, 27.10s/it][A

tensor(0.5504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:13<21:15, 27.13s/it][A

tensor(0.5676, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:40<20:45, 27.09s/it][A

tensor(0.5524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:07<20:17, 27.06s/it][A

tensor(0.6017, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:35<19:55, 27.18s/it][A

tensor(0.6317, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:02<19:26, 27.14s/it][A

tensor(0.5908, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:29<19:01, 27.19s/it][A

tensor(0.5715, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [18:57<18:37, 27.24s/it][A

tensor(0.7498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:24<18:15, 27.39s/it][A

tensor(0.5270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [19:52<17:47, 27.37s/it][A

tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:19<17:19, 27.37s/it][A

tensor(0.4858, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:46<16:52, 27.37s/it][A

tensor(0.5583, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:14<16:24, 27.35s/it][A

tensor(0.4764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:41<15:59, 27.40s/it][A

tensor(0.5149, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:08<15:26, 27.26s/it][A

tensor(0.4787, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:35<14:56, 27.16s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:02<14:28, 27.13s/it][A

tensor(0.5532, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:29<14:03, 27.20s/it][A

tensor(0.5267, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [23:56<13:33, 27.13s/it][A

tensor(0.5250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:23<13:05, 27.07s/it][A

tensor(0.5823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [24:50<12:37, 27.05s/it][A

tensor(0.5050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:17<12:09, 27.03s/it][A

tensor(0.8083, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:45<11:46, 27.18s/it][A

tensor(0.6020, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:12<11:17, 27.11s/it][A

tensor(0.5481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:39<10:48, 27.04s/it][A

tensor(0.5888, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:06<10:21, 27.03s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:33<09:58, 27.19s/it][A

tensor(0.5929, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:01<09:31, 27.20s/it][A

tensor(0.5271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:28<09:03, 27.15s/it][A

tensor(0.6051, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [28:54<08:34, 27.08s/it][A

tensor(0.4963, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:21<08:06, 27.03s/it][A

tensor(0.6373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [29:49<07:41, 27.14s/it][A

tensor(0.5966, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:16<07:13, 27.12s/it][A

tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:43<06:45, 27.06s/it][A

tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:10<06:18, 27.04s/it][A

tensor(0.6403, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:37<05:53, 27.18s/it][A

tensor(0.5483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:04<05:25, 27.16s/it][A

tensor(0.5204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:32<04:58, 27.17s/it][A

tensor(0.6715, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [32:59<04:31, 27.17s/it][A

tensor(0.6714, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:26<04:04, 27.18s/it][A

tensor(0.5619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [33:54<03:38, 27.31s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:21<03:10, 27.26s/it][A

tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [34:48<02:43, 27.27s/it][A

tensor(0.6461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:15<02:16, 27.27s/it][A

tensor(0.5348, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:43<01:49, 27.44s/it][A

tensor(0.5039, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:10<01:22, 27.39s/it][A

tensor(0.5233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:38<00:54, 27.38s/it][A

tensor(0.6403, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:05<00:27, 27.35s/it][A

tensor(0.5560, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:15<00:00, 26.93s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5249, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5926659311874803

	train acc: 0.6493702081051479

	training prec: 0.8571712146146024

	training rec: 0.6493702081051479

	training f1: 0.7095283628264851

	Current Learning rate:  1.4285714285714285e-05



  2%|▏         | 1/42 [00:03<02:21,  3.45s/it][A
  5%|▍         | 2/42 [00:06<02:18,  3.46s/it][A
  7%|▋         | 3/42 [00:10<02:15,  3.47s/it][A
 10%|▉         | 4/42 [00:13<02:10,  3.45s/it][A
 12%|█▏        | 5/42 [00:17<02:07,  3.45s/it][A
 14%|█▍        | 6/42 [00:20<02:07,  3.53s/it][A
 17%|█▋        | 7/42 [00:24<02:04,  3.57s/it][A
 19%|█▉        | 8/42 [00:28<02:00,  3.54s/it][A
 21%|██▏       | 9/42 [00:31<01:56,  3.52s/it][A
 24%|██▍       | 10/42 [00:34<01:51,  3.49s/it][A
 26%|██▌       | 11/42 [00:38<01:47,  3.48s/it][A
 29%|██▊       | 12/42 [00:41<01:44,  3.47s/it][A
 31%|███       | 13/42 [00:45<01:40,  3.46s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.45s/it][A
 36%|███▌      | 15/42 [00:52<01:33,  3.46s/it][A
 38%|███▊      | 16/42 [00:55<01:29,  3.45s/it][A
 40%|████      | 17/42 [00:59<01:26,  3.45s/it][A
 43%|████▎     | 18/42 [01:02<01:23,  3.46s/it][A
 45%|████▌     | 19/42 [01:06<01:19,  3.45s/it][A
 48%|████▊     | 20/42 [01:09<01:16,  3


	Validation loss: 0.5845490466980707

	Validation acc: 0.6408730158730159

	Validation prec: 0.864815702212647

	Validation rec: 0.6408730158730159

	Validation f1: 0.707142836165987
loss: 


  1%|          | 1/83 [00:27<37:52, 27.71s/it][A

tensor(0.7180, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:55<37:10, 27.54s/it][A

tensor(0.6745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:22<36:34, 27.43s/it][A

tensor(0.6063, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:49<36:03, 27.39s/it][A

tensor(0.5524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:17<35:45, 27.51s/it][A

tensor(0.6373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:44<35:16, 27.48s/it][A

tensor(0.6417, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:12<34:44, 27.43s/it][A

tensor(0.6425, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:39<34:14, 27.39s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:07<33:56, 27.52s/it][A

tensor(0.5829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:34<33:23, 27.45s/it][A

tensor(0.6548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [05:02<32:54, 27.43s/it][A

tensor(0.8242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:29<32:24, 27.39s/it][A

tensor(0.5505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:56<31:55, 27.37s/it][A

tensor(0.5623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:24<31:38, 27.51s/it][A

tensor(0.5648, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:51<31:09, 27.49s/it][A

tensor(0.6724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:19<30:37, 27.43s/it][A

tensor(0.7204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:46<30:08, 27.40s/it][A

tensor(0.4954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:14<29:49, 27.53s/it][A

tensor(0.6936, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:41<29:18, 27.47s/it][A

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:08<28:46, 27.40s/it][A

tensor(0.5670, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:36<28:17, 27.38s/it][A

tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [10:03<27:48, 27.35s/it][A

tensor(0.5112, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:31<27:30, 27.50s/it][A

tensor(0.6028, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:58<26:58, 27.44s/it][A

tensor(0.6079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:26<26:29, 27.41s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:53<26:00, 27.37s/it][A

tensor(0.6337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:21<25:40, 27.50s/it][A

tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:48<25:09, 27.44s/it][A

tensor(0.5073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:15<24:39, 27.41s/it][A

tensor(0.6839, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:43<24:11, 27.38s/it][A

tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:10<23:43, 27.38s/it][A

tensor(0.5637, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:38<23:22, 27.50s/it][A

tensor(0.6479, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [15:05<22:53, 27.46s/it][A

tensor(0.5189, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:32<22:23, 27.41s/it][A

tensor(0.5386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [16:00<21:54, 27.39s/it][A

tensor(0.5143, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:27<21:31, 27.47s/it][A

tensor(0.5604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:55<21:02, 27.45s/it][A

tensor(0.6966, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:22<20:33, 27.41s/it][A

tensor(0.5217, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:49<20:04, 27.37s/it][A

tensor(0.5798, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:17<19:36, 27.35s/it][A

tensor(0.5973, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:45<19:14, 27.49s/it][A

tensor(0.5911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:12<18:44, 27.43s/it][A

tensor(0.7159, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:39<18:15, 27.39s/it][A

tensor(0.6921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [20:06<17:47, 27.36s/it][A

tensor(0.7415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:34<17:24, 27.48s/it][A

tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [21:02<16:55, 27.44s/it][A

tensor(0.4660, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:29<16:26, 27.39s/it][A

tensor(0.4887, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:56<15:59, 27.40s/it][A

tensor(0.7440, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:24<15:31, 27.38s/it][A

tensor(0.6707, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:51<15:07, 27.50s/it][A

tensor(0.5809, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:19<14:38, 27.45s/it][A

tensor(0.7452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:46<14:10, 27.44s/it][A

tensor(0.7527, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:13<13:41, 27.39s/it][A

tensor(0.6143, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:41<13:17, 27.49s/it][A

tensor(0.4765, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [25:08<12:48, 27.46s/it][A

tensor(0.5961, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:36<12:20, 27.42s/it][A

tensor(0.5533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [26:03<11:51, 27.37s/it][A

tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:31<11:27, 27.48s/it][A

tensor(0.5810, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:58<10:57, 27.42s/it][A

tensor(0.7457, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:25<10:29, 27.39s/it][A

tensor(0.5434, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:53<10:02, 27.38s/it][A

tensor(0.5770, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:20<09:35, 27.40s/it][A

tensor(0.4983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:48<09:10, 27.51s/it][A

tensor(0.5792, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:15<08:41, 27.47s/it][A

tensor(0.5408, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:43<08:13, 27.42s/it][A

tensor(0.5696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [30:10<07:45, 27.40s/it][A

tensor(0.5293, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:38<07:19, 27.49s/it][A

tensor(0.5864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [31:05<06:51, 27.45s/it][A

tensor(0.5750, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:32<06:23, 27.40s/it][A

tensor(0.6321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [32:00<05:55, 27.37s/it][A

tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:27<05:28, 27.35s/it][A

tensor(0.6324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:55<05:02, 27.49s/it][A

tensor(0.5697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:22<04:34, 27.44s/it][A

tensor(0.7575, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:49<04:06, 27.43s/it][A

tensor(0.6043, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [34:17<03:39, 27.38s/it][A

tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:45<03:12, 27.49s/it][A

tensor(0.5661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [35:12<02:44, 27.48s/it][A

tensor(0.5543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:39<02:17, 27.42s/it][A

tensor(0.5255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [36:07<01:49, 27.43s/it][A

tensor(0.6376, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:34<01:22, 27.43s/it][A

tensor(0.7138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [37:02<00:55, 27.53s/it][A

tensor(0.7174, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:29<00:27, 27.49s/it][A

tensor(0.5099, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:39<00:00, 27.22s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5945, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6113887253295944

	train acc: 0.6586117196056955

	training prec: 0.8463604487747223

	training rec: 0.6586117196056955

	training f1: 0.7168137742853851

	Current Learning rate:  1.2857142857142857e-05



  2%|▏         | 1/42 [00:03<02:24,  3.52s/it][A
  5%|▍         | 2/42 [00:06<02:18,  3.47s/it][A
  7%|▋         | 3/42 [00:10<02:15,  3.47s/it][A
 10%|▉         | 4/42 [00:13<02:11,  3.47s/it][A
 12%|█▏        | 5/42 [00:17<02:07,  3.45s/it][A
 14%|█▍        | 6/42 [00:20<02:04,  3.45s/it][A
 17%|█▋        | 7/42 [00:24<02:01,  3.47s/it][A
 19%|█▉        | 8/42 [00:27<01:57,  3.46s/it][A
 21%|██▏       | 9/42 [00:31<01:53,  3.45s/it][A
 24%|██▍       | 10/42 [00:34<01:50,  3.46s/it][A
 26%|██▌       | 11/42 [00:38<01:46,  3.45s/it][A
 29%|██▊       | 12/42 [00:41<01:43,  3.45s/it][A
 31%|███       | 13/42 [00:44<01:40,  3.45s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.44s/it][A
 36%|███▌      | 15/42 [00:51<01:33,  3.45s/it][A
 38%|███▊      | 16/42 [00:55<01:29,  3.46s/it][A
 40%|████      | 17/42 [00:58<01:25,  3.44s/it][A
 43%|████▎     | 18/42 [01:02<01:22,  3.45s/it][A
 45%|████▌     | 19/42 [01:06<01:23,  3.62s/it][A
 48%|████▊     | 20/42 [01:09<01:18,  3


	Validation loss: 0.5874368044592085

	Validation acc: 0.6222718253968255

	Validation prec: 0.8654722342154626

	Validation rec: 0.6222718253968255

	Validation f1: 0.6880778159284995
loss: 


  1%|          | 1/83 [00:27<37:22, 27.34s/it][A

tensor(0.5033, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:55<37:15, 27.60s/it][A

tensor(0.6684, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:22<36:38, 27.48s/it][A

tensor(0.7180, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:49<36:08, 27.45s/it][A

tensor(0.5961, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:17<35:38, 27.41s/it][A

tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:44<35:10, 27.41s/it][A

tensor(0.5114, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:12<34:51, 27.52s/it][A

tensor(0.7195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:39<34:20, 27.48s/it][A

tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:07<33:53, 27.48s/it][A

tensor(0.5903, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:34<33:26, 27.49s/it][A

tensor(0.5039, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [05:02<33:07, 27.60s/it][A

tensor(0.6118, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:30<32:38, 27.58s/it][A

tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:57<32:07, 27.54s/it][A

tensor(0.5706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:24<31:35, 27.47s/it][A

tensor(0.5610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:52<31:09, 27.49s/it][A

tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:19<30:35, 27.39s/it][A

tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:46<30:04, 27.34s/it][A

tensor(0.5808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:14<29:35, 27.31s/it][A

tensor(0.5722, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:41<29:04, 27.25s/it][A

tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:08<28:45, 27.38s/it][A

tensor(0.7144, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:36<28:13, 27.32s/it][A

tensor(0.5809, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [10:03<27:43, 27.28s/it][A

tensor(0.5167, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:30<27:13, 27.22s/it][A

tensor(0.5349, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:57<26:53, 27.35s/it][A

tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:25<26:24, 27.32s/it][A

tensor(0.6844, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:52<25:54, 27.27s/it][A

tensor(0.6269, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:19<25:25, 27.25s/it][A

tensor(0.5331, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:46<24:58, 27.25s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:14<24:39, 27.39s/it][A

tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:41<24:08, 27.33s/it][A

tensor(0.6517, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:08<23:39, 27.30s/it][A

tensor(0.8202, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:36<23:12, 27.31s/it][A

tensor(0.5937, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [15:03<22:49, 27.40s/it][A

tensor(0.6395, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:30<22:18, 27.32s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:58<21:48, 27.27s/it][A

tensor(0.5784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:25<21:21, 27.26s/it][A

tensor(0.5965, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:52<20:54, 27.27s/it][A

tensor(0.5131, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:20<20:32, 27.39s/it][A

tensor(0.5975, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:47<20:03, 27.35s/it][A

tensor(0.5369, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:14<19:30, 27.23s/it][A

tensor(0.5457, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:41<19:00, 27.16s/it][A

tensor(0.5702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:08<18:36, 27.23s/it][A

tensor(0.5660, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:35<18:04, 27.12s/it][A

tensor(0.5560, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [20:02<17:36, 27.09s/it][A

tensor(0.6689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:30<17:11, 27.13s/it][A

tensor(0.6061, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:57<16:43, 27.13s/it][A

tensor(0.5389, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:24<16:22, 27.29s/it][A

tensor(0.6284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:51<15:53, 27.23s/it][A

tensor(0.5176, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:18<15:24, 27.18s/it][A

tensor(0.5823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:46<14:55, 27.14s/it][A

tensor(0.6522, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:13<14:33, 27.29s/it][A

tensor(0.7382, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:40<14:04, 27.24s/it][A

tensor(0.6353, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:07<13:36, 27.21s/it][A

tensor(0.5758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:34<13:06, 27.13s/it][A

tensor(0.6836, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [25:01<12:39, 27.12s/it][A

tensor(0.6779, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:29<12:15, 27.23s/it][A

tensor(0.7083, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:56<11:46, 27.18s/it][A

tensor(0.5037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:23<11:18, 27.13s/it][A

tensor(0.5687, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:50<10:50, 27.09s/it][A

tensor(0.5515, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:18<10:26, 27.24s/it][A

tensor(0.6260, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:45<09:58, 27.19s/it][A

tensor(0.5553, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:12<09:30, 27.15s/it][A

tensor(0.5430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:39<09:02, 27.13s/it][A

tensor(0.6444, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:06<08:35, 27.11s/it][A

tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:33<08:10, 27.22s/it][A

tensor(0.5495, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [30:00<07:42, 27.18s/it][A

tensor(0.6890, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:28<07:14, 27.15s/it][A

tensor(0.5052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:55<06:46, 27.13s/it][A

tensor(0.6337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:22<06:21, 27.28s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:49<05:53, 27.22s/it][A

tensor(0.6384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:16<05:26, 27.18s/it][A

tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:44<04:58, 27.17s/it][A

tensor(0.6433, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:11<04:31, 27.15s/it][A

tensor(0.5559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:38<04:05, 27.27s/it][A

tensor(0.5089, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [34:05<03:37, 27.22s/it][A

tensor(0.4915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:32<03:10, 27.17s/it][A

tensor(0.5351, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [34:59<02:42, 27.15s/it][A

tensor(0.5401, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:27<02:16, 27.30s/it][A

tensor(0.5812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:54<01:48, 27.23s/it][A

tensor(0.5697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:21<01:21, 27.19s/it][A

tensor(0.5278, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:48<00:54, 27.15s/it][A

tensor(0.5495, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:15<00:27, 27.13s/it][A

tensor(0.5904, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:25<00:00, 27.05s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6030, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5989520944744707

	train acc: 0.6604600219058051

	training prec: 0.8570391084669136

	training rec: 0.6604600219058051

	training f1: 0.7187228341626605

	Current Learning rate:  1.1428571428571429e-05



  2%|▏         | 1/42 [00:03<02:36,  3.81s/it][A
  5%|▍         | 2/42 [00:07<02:23,  3.58s/it][A
  7%|▋         | 3/42 [00:10<02:15,  3.49s/it][A
 10%|▉         | 4/42 [00:14<02:11,  3.46s/it][A
 12%|█▏        | 5/42 [00:17<02:07,  3.45s/it][A
 14%|█▍        | 6/42 [00:20<02:03,  3.44s/it][A
 17%|█▋        | 7/42 [00:24<02:00,  3.44s/it][A
 19%|█▉        | 8/42 [00:27<01:56,  3.43s/it][A
 21%|██▏       | 9/42 [00:31<01:52,  3.42s/it][A
 24%|██▍       | 10/42 [00:34<01:49,  3.42s/it][A
 26%|██▌       | 11/42 [00:37<01:46,  3.43s/it][A
 29%|██▊       | 12/42 [00:41<01:42,  3.41s/it][A
 31%|███       | 13/42 [00:44<01:39,  3.42s/it][A
 33%|███▎      | 14/42 [00:48<01:35,  3.43s/it][A
 36%|███▌      | 15/42 [00:51<01:31,  3.41s/it][A
 38%|███▊      | 16/42 [00:55<01:28,  3.41s/it][A
 40%|████      | 17/42 [00:58<01:25,  3.43s/it][A
 43%|████▎     | 18/42 [01:01<01:21,  3.42s/it][A
 45%|████▌     | 19/42 [01:05<01:18,  3.42s/it][A
 48%|████▊     | 20/42 [01:08<01:15,  3


	Validation loss: 0.5869714660303933

	Validation acc: 0.6398809523809523

	Validation prec: 0.8612169760538754

	Validation rec: 0.6398809523809523

	Validation f1: 0.7041621398485023
loss: 


  1%|          | 1/83 [00:27<37:01, 27.09s/it][A

tensor(0.5766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:54<36:30, 27.04s/it][A

tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:21<36:04, 27.06s/it][A

tensor(0.6446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:48<35:52, 27.25s/it][A

tensor(0.4729, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:15<35:22, 27.21s/it][A

tensor(0.5789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:42<34:49, 27.13s/it][A

tensor(0.5016, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:09<34:18, 27.09s/it][A

tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:36<33:48, 27.04s/it][A

tensor(0.6533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:04<33:31, 27.18s/it][A

tensor(0.8221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:31<33:00, 27.13s/it][A

tensor(0.7051, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:58<32:29, 27.07s/it][A

tensor(0.5905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:25<31:57, 27.01s/it][A

tensor(0.5998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:52<31:42, 27.17s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:19<31:14, 27.16s/it][A

tensor(0.5526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:46<30:46, 27.16s/it][A

tensor(0.5942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:14<30:20, 27.17s/it][A

tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:41<29:51, 27.15s/it][A

tensor(0.5229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:08<29:33, 27.28s/it][A

tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:35<29:03, 27.24s/it][A

tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:03<28:33, 27.21s/it][A

tensor(0.6721, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:30<28:03, 27.16s/it][A

tensor(0.6103, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [09:57<27:45, 27.31s/it][A

tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:24<27:14, 27.25s/it][A

tensor(0.5224, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:51<26:44, 27.19s/it][A

tensor(0.7307, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:19<26:15, 27.17s/it][A

tensor(0.7025, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:46<25:46, 27.14s/it][A

tensor(0.5855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:13<25:25, 27.23s/it][A

tensor(0.4794, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:40<24:55, 27.18s/it][A

tensor(0.6079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:07<24:25, 27.14s/it][A

tensor(0.5584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:34<23:57, 27.12s/it][A

tensor(0.6126, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:02<23:38, 27.28s/it][A

tensor(0.6542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:29<23:07, 27.21s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [14:56<22:38, 27.18s/it][A

tensor(0.6223, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:23<22:10, 27.16s/it][A

tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:50<21:42, 27.15s/it][A

tensor(0.5834, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:18<21:22, 27.29s/it][A

tensor(0.6202, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:45<20:53, 27.25s/it][A

tensor(0.5926, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:12<20:25, 27.23s/it][A

tensor(0.5332, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:39<19:56, 27.19s/it][A

tensor(0.5711, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:07<19:36, 27.37s/it][A

tensor(0.6515, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:34<19:06, 27.29s/it][A

tensor(0.5924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:01<18:34, 27.18s/it][A

tensor(0.6724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:28<18:05, 27.13s/it][A

tensor(0.5720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [19:55<17:36, 27.10s/it][A

tensor(0.5930, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:23<17:13, 27.21s/it][A

tensor(0.6746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:50<16:45, 27.17s/it][A

tensor(0.7161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:17<16:17, 27.14s/it][A

tensor(0.6218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:44<15:49, 27.13s/it][A

tensor(0.6085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:11<15:26, 27.25s/it][A

tensor(0.5206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:38<14:56, 27.18s/it][A

tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:06<14:28, 27.16s/it][A

tensor(0.6284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:33<14:00, 27.13s/it][A

tensor(0.6153, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:00<13:33, 27.11s/it][A

tensor(0.5316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:27<13:09, 27.23s/it][A

tensor(0.6152, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [24:54<12:42, 27.24s/it][A

tensor(0.5817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:22<12:15, 27.26s/it][A

tensor(0.7044, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:49<11:48, 27.27s/it][A

tensor(0.6266, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:17<11:25, 27.43s/it][A

tensor(0.6878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:44<10:57, 27.40s/it][A

tensor(0.6145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:11<10:28, 27.35s/it][A

tensor(0.6551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:39<10:01, 27.32s/it][A

tensor(0.6292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:06<09:33, 27.30s/it][A

tensor(0.6173, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:34<09:07, 27.38s/it][A

tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:00<08:37, 27.23s/it][A

tensor(0.5138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:27<08:08, 27.11s/it][A

tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [29:54<07:39, 27.03s/it][A

tensor(0.6775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:22<07:14, 27.15s/it][A

tensor(0.6274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:48<06:46, 27.07s/it][A

tensor(0.5567, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:15<06:18, 27.02s/it][A

tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:42<05:50, 26.97s/it][A

tensor(0.7310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:09<05:23, 26.95s/it][A

tensor(0.6155, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:37<04:58, 27.12s/it][A

tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:04<04:30, 27.09s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:31<04:03, 27.07s/it][A

tensor(0.5432, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [33:58<03:36, 27.09s/it][A

tensor(0.4968, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:26<03:11, 27.38s/it][A

tensor(0.5641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [34:53<02:44, 27.35s/it][A

tensor(0.5504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:20<02:16, 27.31s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:47<01:49, 27.26s/it][A

tensor(0.6375, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:15<01:21, 27.25s/it][A

tensor(0.6984, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:42<00:54, 27.37s/it][A

tensor(0.5576, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:10<00:27, 27.32s/it][A

tensor(0.6193, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:19<00:00, 26.98s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7850, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6159125282821885

	train acc: 0.6444927437020811

	training prec: 0.8451568373942532

	training rec: 0.6444927437020811

	training f1: 0.7053906701824301

	Current Learning rate:  1e-05



  2%|▏         | 1/42 [00:03<02:21,  3.46s/it][A
  5%|▍         | 2/42 [00:06<02:18,  3.46s/it][A
  7%|▋         | 3/42 [00:10<02:15,  3.47s/it][A
 10%|▉         | 4/42 [00:13<02:11,  3.45s/it][A
 12%|█▏        | 5/42 [00:17<02:07,  3.46s/it][A
 14%|█▍        | 6/42 [00:20<02:04,  3.46s/it][A
 17%|█▋        | 7/42 [00:24<02:00,  3.44s/it][A
 19%|█▉        | 8/42 [00:27<01:57,  3.46s/it][A
 21%|██▏       | 9/42 [00:31<01:54,  3.47s/it][A
 24%|██▍       | 10/42 [00:34<01:50,  3.45s/it][A
 26%|██▌       | 11/42 [00:38<01:46,  3.45s/it][A
 29%|██▊       | 12/42 [00:41<01:43,  3.45s/it][A
 31%|███       | 13/42 [00:44<01:39,  3.44s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.44s/it][A
 36%|███▌      | 15/42 [00:51<01:32,  3.44s/it][A
 38%|███▊      | 16/42 [00:55<01:29,  3.43s/it][A
 40%|████      | 17/42 [00:58<01:26,  3.45s/it][A
 43%|████▎     | 18/42 [01:02<01:22,  3.45s/it][A
 45%|████▌     | 19/42 [01:05<01:18,  3.43s/it][A
 48%|████▊     | 20/42 [01:09<01:18,  3


	Validation loss: 0.5866220288333439

	Validation acc: 0.6128472222222222

	Validation prec: 0.8671774934734147

	Validation rec: 0.6128472222222222

	Validation f1: 0.6805276552733064
loss: 


  1%|          | 1/83 [00:27<37:09, 27.19s/it][A

tensor(0.6694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:54<37:04, 27.46s/it][A

tensor(0.5203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:21<36:25, 27.32s/it][A

tensor(0.6061, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:49<35:53, 27.26s/it][A

tensor(0.6907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:16<35:23, 27.23s/it][A

tensor(0.5533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:43<34:56, 27.23s/it][A

tensor(0.5685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:11<34:36, 27.32s/it][A

tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:38<34:02, 27.23s/it][A

tensor(0.6356, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:05<33:30, 27.17s/it][A

tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:32<33:01, 27.14s/it][A

tensor(0.5811, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:59<32:45, 27.29s/it][A

tensor(0.5928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:26<32:12, 27.22s/it][A

tensor(0.6132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:53<31:41, 27.16s/it][A

tensor(0.5778, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:21<31:14, 27.16s/it][A

tensor(0.5484, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:48<30:45, 27.14s/it][A

tensor(0.5201, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:15<30:26, 27.26s/it][A

tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:42<29:54, 27.19s/it][A

tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:09<29:22, 27.11s/it][A

tensor(0.5615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:36<28:52, 27.07s/it][A

tensor(0.5409, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:04<28:34, 27.21s/it][A

tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:31<28:05, 27.18s/it][A

tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [09:58<27:34, 27.13s/it][A

tensor(0.6459, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:25<27:05, 27.10s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:52<26:34, 27.03s/it][A

tensor(0.7033, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:19<26:16, 27.18s/it][A

tensor(0.6750, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:46<25:47, 27.14s/it][A

tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:13<25:19, 27.13s/it][A

tensor(0.5824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:40<24:51, 27.11s/it][A

tensor(0.7849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:08<24:34, 27.31s/it][A

tensor(0.5665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:35<24:05, 27.27s/it][A

tensor(0.5445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:03<23:37, 27.25s/it][A

tensor(0.5805, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:30<23:08, 27.23s/it][A

tensor(0.6733, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [14:57<22:40, 27.21s/it][A

tensor(0.4914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:24<22:17, 27.29s/it][A

tensor(0.5103, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:52<21:47, 27.23s/it][A

tensor(0.5806, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:19<21:16, 27.17s/it][A

tensor(0.6901, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:46<20:47, 27.12s/it][A

tensor(0.5657, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:13<20:28, 27.31s/it][A

tensor(0.5488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:40<19:58, 27.24s/it][A

tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:07<19:28, 27.18s/it][A

tensor(0.4939, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:34<18:59, 27.13s/it][A

tensor(0.6348, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:02<18:32, 27.14s/it][A

tensor(0.6168, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:29<18:10, 27.27s/it][A

tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [19:56<17:40, 27.20s/it][A

tensor(0.6006, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:23<17:11, 27.15s/it][A

tensor(0.6654, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:50<16:43, 27.13s/it][A

tensor(0.5911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:18<16:21, 27.25s/it][A

tensor(0.5712, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:45<15:51, 27.18s/it][A

tensor(0.5900, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:12<15:22, 27.14s/it][A

tensor(0.6028, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:39<14:54, 27.12s/it][A

tensor(0.5399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:06<14:26, 27.07s/it][A

tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:33<14:03, 27.19s/it][A

tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:00<13:34, 27.13s/it][A

tensor(0.5772, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:28<13:06, 27.11s/it][A

tensor(0.7490, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [24:55<12:38, 27.10s/it][A

tensor(0.5461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:22<12:16, 27.27s/it][A

tensor(0.5413, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:49<11:47, 27.21s/it][A

tensor(0.4962, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:16<11:18, 27.16s/it][A

tensor(0.5951, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:43<10:50, 27.10s/it][A

tensor(0.6132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:10<10:23, 27.09s/it][A

tensor(0.5741, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:38<09:58, 27.21s/it][A

tensor(0.5608, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:05<09:30, 27.16s/it][A

tensor(0.7221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:32<09:02, 27.12s/it][A

tensor(0.7064, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [28:59<08:35, 27.11s/it][A

tensor(0.5602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:27<08:10, 27.27s/it][A

tensor(0.5466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [29:54<07:42, 27.23s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:21<07:14, 27.19s/it][A

tensor(0.5210, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:48<06:47, 27.17s/it][A

tensor(0.6738, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:15<06:20, 27.18s/it][A

tensor(0.5511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:43<05:55, 27.33s/it][A

tensor(0.6213, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:10<05:27, 27.31s/it][A

tensor(0.5947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:37<05:00, 27.29s/it][A

tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:05<04:32, 27.24s/it][A

tensor(0.7073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:32<04:06, 27.36s/it][A

tensor(0.5686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [33:59<03:38, 27.28s/it][A

tensor(0.5743, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:26<03:10, 27.22s/it][A

tensor(0.7313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [34:53<02:43, 27.18s/it][A

tensor(0.6138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:21<02:15, 27.15s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:48<01:49, 27.25s/it][A

tensor(0.5465, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:15<01:21, 27.19s/it][A

tensor(0.7897, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:42<00:54, 27.14s/it][A

tensor(0.5464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:09<00:27, 27.13s/it][A

tensor(0.5541, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:19<00:00, 26.98s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7483, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6076101772038334

	train acc: 0.6453142113910186

	training prec: 0.8516646303609609

	training rec: 0.6453142113910186

	training f1: 0.7069376756740747

	Current Learning rate:  8.571428571428573e-06



  2%|▏         | 1/42 [00:03<02:21,  3.46s/it][A
  5%|▍         | 2/42 [00:06<02:17,  3.43s/it][A
  7%|▋         | 3/42 [00:10<02:15,  3.48s/it][A
 10%|▉         | 4/42 [00:14<02:15,  3.56s/it][A
 12%|█▏        | 5/42 [00:17<02:13,  3.60s/it][A
 14%|█▍        | 6/42 [00:21<02:07,  3.55s/it][A
 17%|█▋        | 7/42 [00:24<02:03,  3.53s/it][A
 19%|█▉        | 8/42 [00:28<01:58,  3.49s/it][A
 21%|██▏       | 9/42 [00:31<01:54,  3.48s/it][A
 24%|██▍       | 10/42 [00:35<01:51,  3.47s/it][A
 26%|██▌       | 11/42 [00:38<01:47,  3.45s/it][A
 29%|██▊       | 12/42 [00:41<01:43,  3.45s/it][A
 31%|███       | 13/42 [00:45<01:40,  3.45s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.44s/it][A
 36%|███▌      | 15/42 [00:52<01:32,  3.44s/it][A
 38%|███▊      | 16/42 [00:55<01:29,  3.45s/it][A
 40%|████      | 17/42 [00:59<01:26,  3.44s/it][A
 43%|████▎     | 18/42 [01:02<01:22,  3.44s/it][A
 45%|████▌     | 19/42 [01:05<01:19,  3.45s/it][A
 48%|████▊     | 20/42 [01:09<01:15,  3


	Validation loss: 0.5851248800754547

	Validation acc: 0.6336805555555556

	Validation prec: 0.8634364657029853

	Validation rec: 0.6336805555555556

	Validation f1: 0.6972708578973057
loss: 


  1%|          | 1/83 [00:27<37:21, 27.33s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:54<36:50, 27.29s/it][A

tensor(0.5039, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:21<36:17, 27.22s/it][A

tensor(0.9026, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:48<35:50, 27.22s/it][A

tensor(0.6072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:16<35:35, 27.38s/it][A

tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:43<35:03, 27.32s/it][A

tensor(0.7122, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:10<34:29, 27.23s/it][A

tensor(0.5076, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:38<34:00, 27.20s/it][A

tensor(0.5475, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:05<33:44, 27.36s/it][A

tensor(0.5345, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:32<33:13, 27.30s/it][A

tensor(0.6986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:59<32:41, 27.24s/it][A

tensor(0.6024, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:27<32:12, 27.22s/it][A

tensor(0.6861, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:54<31:42, 27.18s/it][A

tensor(0.5583, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:21<31:23, 27.30s/it][A

tensor(0.5992, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:48<30:51, 27.22s/it][A

tensor(0.6675, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:15<30:21, 27.18s/it][A

tensor(0.5240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:43<29:52, 27.16s/it][A

tensor(0.5661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:10<29:35, 27.32s/it][A

tensor(0.5816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:38<29:07, 27.31s/it][A

tensor(0.5880, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:05<28:36, 27.25s/it][A

tensor(0.4815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:32<28:06, 27.20s/it][A

tensor(0.6319, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [09:59<27:37, 27.17s/it][A

tensor(0.5159, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:26<27:18, 27.31s/it][A

tensor(0.5789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:54<26:51, 27.31s/it][A

tensor(0.5756, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:21<26:23, 27.29s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:48<25:55, 27.30s/it][A

tensor(0.5717, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:16<25:34, 27.40s/it][A

tensor(0.5403, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:43<25:01, 27.30s/it][A

tensor(0.5273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:10<24:31, 27.24s/it][A

tensor(0.5549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:37<24:00, 27.18s/it][A

tensor(0.5188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:04<23:33, 27.19s/it][A

tensor(0.6205, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:32<23:14, 27.35s/it][A

tensor(0.5458, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [15:00<22:48, 27.37s/it][A

tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:27<22:23, 27.41s/it][A

tensor(0.6137, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:54<21:54, 27.38s/it][A

tensor(0.5295, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:22<21:33, 27.52s/it][A

tensor(0.5908, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:49<21:02, 27.45s/it][A

tensor(0.5817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:17<20:33, 27.42s/it][A

tensor(0.7123, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:44<20:04, 27.38s/it][A

tensor(0.5586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:11<19:35, 27.33s/it][A

tensor(0.5343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:39<19:16, 27.54s/it][A

tensor(0.4611, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:07<18:46, 27.47s/it][A

tensor(0.7010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:34<18:16, 27.42s/it][A

tensor(0.5718, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [20:01<17:47, 27.37s/it][A

tensor(0.5805, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:29<17:25, 27.50s/it][A

tensor(0.5471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:56<16:54, 27.43s/it][A

tensor(0.6996, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:24<16:26, 27.39s/it][A

tensor(0.6000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:51<15:56, 27.33s/it][A

tensor(0.7422, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:18<15:28, 27.31s/it][A

tensor(0.5069, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:46<15:06, 27.48s/it][A

tensor(0.5460, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:13<14:38, 27.45s/it][A

tensor(0.6532, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:41<14:09, 27.39s/it][A

tensor(0.5882, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:08<13:41, 27.38s/it][A

tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:36<13:17, 27.51s/it][A

tensor(0.5833, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [25:03<12:48, 27.43s/it][A

tensor(0.5637, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:30<12:19, 27.38s/it][A

tensor(0.5611, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:58<11:51, 27.36s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:25<11:22, 27.31s/it][A

tensor(0.5102, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:53<10:59, 27.46s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:20<10:29, 27.38s/it][A

tensor(0.7426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:47<10:00, 27.30s/it][A

tensor(0.5657, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:14<09:32, 27.25s/it][A

tensor(0.5701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:42<09:07, 27.39s/it][A

tensor(0.5318, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:09<08:38, 27.30s/it][A

tensor(0.7638, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:36<08:10, 27.25s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [30:03<07:42, 27.23s/it][A

tensor(0.6574, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:30<07:15, 27.21s/it][A

tensor(0.5412, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:58<06:50, 27.35s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:25<06:22, 27.30s/it][A

tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:52<05:54, 27.27s/it][A

tensor(0.6144, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:19<05:26, 27.22s/it][A

tensor(0.5724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:47<05:01, 27.38s/it][A

tensor(0.5430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:14<04:32, 27.28s/it][A

tensor(0.6552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:41<04:05, 27.25s/it][A

tensor(0.6373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [34:09<03:37, 27.21s/it][A

tensor(0.5273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:36<03:10, 27.19s/it][A

tensor(0.5328, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [35:03<02:44, 27.37s/it][A

tensor(0.6130, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:31<02:16, 27.31s/it][A

tensor(0.5987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:58<01:49, 27.27s/it][A

tensor(0.5529, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:25<01:21, 27.22s/it][A

tensor(0.7099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:53<00:54, 27.35s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:20<00:27, 27.28s/it][A

tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:29<00:00, 27.10s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5628, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6006563295082874

	train acc: 0.664841182913472

	training prec: 0.8563812497575852

	training rec: 0.664841182913472

	training f1: 0.722987563472885

	Current Learning rate:  7.142857142857143e-06



  2%|▏         | 1/42 [00:03<02:23,  3.50s/it][A
  5%|▍         | 2/42 [00:06<02:19,  3.48s/it][A
  7%|▋         | 3/42 [00:10<02:14,  3.44s/it][A
 10%|▉         | 4/42 [00:13<02:10,  3.45s/it][A
 12%|█▏        | 5/42 [00:17<02:07,  3.46s/it][A
 14%|█▍        | 6/42 [00:20<02:03,  3.44s/it][A
 17%|█▋        | 7/42 [00:24<02:00,  3.44s/it][A
 19%|█▉        | 8/42 [00:27<01:57,  3.45s/it][A
 21%|██▏       | 9/42 [00:31<01:53,  3.44s/it][A
 24%|██▍       | 10/42 [00:34<01:50,  3.44s/it][A
 26%|██▌       | 11/42 [00:37<01:46,  3.45s/it][A
 29%|██▊       | 12/42 [00:41<01:43,  3.44s/it][A
 31%|███       | 13/42 [00:44<01:39,  3.44s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.45s/it][A
 36%|███▌      | 15/42 [00:51<01:32,  3.44s/it][A
 38%|███▊      | 16/42 [00:55<01:29,  3.44s/it][A
 40%|████      | 17/42 [00:58<01:26,  3.48s/it][A
 43%|████▎     | 18/42 [01:02<01:23,  3.47s/it][A
 45%|████▌     | 19/42 [01:05<01:19,  3.47s/it][A
 48%|████▊     | 20/42 [01:09<01:16,  3


	Validation loss: 0.5839463294971556

	Validation acc: 0.6168154761904762

	Validation prec: 0.8615610095962227

	Validation rec: 0.6168154761904762

	Validation f1: 0.682561825416448
loss: 


  1%|          | 1/83 [00:27<37:06, 27.15s/it][A

tensor(0.4740, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:54<36:47, 27.26s/it][A

tensor(0.5711, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:21<36:20, 27.26s/it][A

tensor(0.5966, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:48<35:51, 27.24s/it][A

tensor(0.5721, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:16<35:25, 27.25s/it][A

tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:43<34:54, 27.20s/it][A

tensor(0.6326, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:11<34:42, 27.40s/it][A

tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:38<34:08, 27.32s/it][A

tensor(0.6180, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:05<33:36, 27.25s/it][A

tensor(0.6021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:32<33:06, 27.22s/it][A

tensor(0.5548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:59<32:44, 27.28s/it][A

tensor(0.6046, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:27<32:16, 27.28s/it][A

tensor(0.6384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:54<31:47, 27.26s/it][A

tensor(0.5604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:21<31:17, 27.21s/it][A

tensor(0.6705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:48<30:50, 27.21s/it][A

tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:16<30:32, 27.35s/it][A

tensor(0.5650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:43<30:00, 27.29s/it][A

tensor(0.6490, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:10<29:29, 27.22s/it][A

tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:37<28:59, 27.18s/it][A

tensor(0.7153, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:05<28:36, 27.25s/it][A

tensor(0.5419, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:32<28:11, 27.29s/it][A

tensor(0.6887, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [09:59<27:39, 27.20s/it][A

tensor(0.7037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:26<27:09, 27.16s/it][A

tensor(0.7562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:53<26:40, 27.13s/it][A

tensor(0.7213, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:21<26:19, 27.23s/it][A

tensor(0.6537, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:48<25:48, 27.17s/it][A

tensor(0.5732, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:15<25:19, 27.14s/it][A

tensor(0.6903, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:42<24:52, 27.13s/it][A

tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:09<24:23, 27.10s/it][A

tensor(0.5329, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:36<24:05, 27.27s/it][A

tensor(0.5617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:04<23:35, 27.22s/it][A

tensor(0.6908, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:31<23:06, 27.18s/it][A

tensor(0.6399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [14:58<22:37, 27.15s/it][A

tensor(0.6432, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:25<22:17, 27.30s/it][A

tensor(0.6435, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:53<21:52, 27.34s/it][A

tensor(0.6461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:20<21:27, 27.40s/it][A

tensor(0.6292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:48<21:00, 27.41s/it][A

tensor(0.5346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:16<20:37, 27.50s/it][A

tensor(0.4768, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:43<20:12, 27.56s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:11<19:43, 27.52s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:38<19:13, 27.46s/it][A

tensor(0.5771, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:05<18:45, 27.46s/it][A

tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:33<18:23, 27.58s/it][A

tensor(0.5834, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [20:01<17:51, 27.48s/it][A

tensor(0.5720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:28<17:21, 27.41s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:55<16:51, 27.34s/it][A

tensor(0.4648, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:23<16:28, 27.47s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:50<15:59, 27.42s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:17<15:30, 27.37s/it][A

tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:45<15:02, 27.34s/it][A

tensor(0.5965, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:12<14:33, 27.31s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:40<14:11, 27.46s/it][A

tensor(0.5142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:07<13:41, 27.38s/it][A

tensor(0.6657, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:34<13:12, 27.34s/it][A

tensor(0.5293, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [25:01<12:44, 27.30s/it][A

tensor(0.5779, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:29<12:21, 27.47s/it][A

tensor(0.6038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:56<11:50, 27.33s/it][A

tensor(0.7155, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:23<11:20, 27.24s/it][A

tensor(0.5979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:50<10:52, 27.18s/it][A

tensor(0.5264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:17<10:24, 27.15s/it][A

tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:45<10:00, 27.27s/it][A

tensor(0.5615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:12<09:31, 27.21s/it][A

tensor(0.6690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:39<09:03, 27.15s/it][A

tensor(0.5817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:06<08:35, 27.12s/it][A

tensor(0.6149, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:33<08:10, 27.24s/it][A

tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [30:01<07:42, 27.18s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:28<07:14, 27.15s/it][A

tensor(0.6032, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:55<06:46, 27.12s/it][A

tensor(0.6518, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:22<06:19, 27.12s/it][A

tensor(0.5818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:49<05:54, 27.28s/it][A

tensor(0.6455, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:16<05:26, 27.22s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:44<05:00, 27.30s/it][A

tensor(0.5588, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:11<04:33, 27.35s/it][A

tensor(0.5581, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:39<04:07, 27.55s/it][A

tensor(0.5462, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [34:07<03:40, 27.53s/it][A

tensor(0.6038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:34<03:12, 27.52s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [35:02<02:44, 27.49s/it][A

tensor(0.5945, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:29<02:17, 27.47s/it][A

tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:57<01:50, 27.60s/it][A

tensor(0.5300, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:24<01:22, 27.51s/it][A

tensor(0.5001, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:52<00:54, 27.45s/it][A

tensor(0.4982, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:19<00:27, 27.43s/it][A

tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:29<00:00, 27.10s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7756, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6081035650638212

	train acc: 0.6519030668127054

	training prec: 0.8525712018379235

	training rec: 0.6519030668127054

	training f1: 0.7115715461742761

	Current Learning rate:  5.7142857142857145e-06



  2%|▏         | 1/42 [00:03<02:19,  3.41s/it][A
  5%|▍         | 2/42 [00:06<02:18,  3.45s/it][A
  7%|▋         | 3/42 [00:10<02:15,  3.47s/it][A
 10%|▉         | 4/42 [00:13<02:12,  3.49s/it][A
 12%|█▏        | 5/42 [00:17<02:13,  3.61s/it][A
 14%|█▍        | 6/42 [00:21<02:08,  3.56s/it][A
 17%|█▋        | 7/42 [00:24<02:02,  3.51s/it][A
 19%|█▉        | 8/42 [00:28<01:58,  3.50s/it][A
 21%|██▏       | 9/42 [00:31<01:55,  3.49s/it][A
 24%|██▍       | 10/42 [00:34<01:50,  3.47s/it][A
 26%|██▌       | 11/42 [00:38<01:47,  3.46s/it][A
 29%|██▊       | 12/42 [00:41<01:43,  3.47s/it][A
 31%|███       | 13/42 [00:45<01:40,  3.45s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.46s/it][A
 36%|███▌      | 15/42 [00:52<01:33,  3.46s/it][A
 38%|███▊      | 16/42 [00:55<01:29,  3.45s/it][A
 40%|████      | 17/42 [00:59<01:26,  3.45s/it][A
 43%|████▎     | 18/42 [01:02<01:22,  3.45s/it][A
 45%|████▌     | 19/42 [01:05<01:18,  3.43s/it][A
 48%|████▊     | 20/42 [01:09<01:15,  3


	Validation loss: 0.5796221551441011

	Validation acc: 0.6346726190476191

	Validation prec: 0.8634064599007516

	Validation rec: 0.6346726190476191

	Validation f1: 0.6970293132865271
loss: 


  1%|          | 1/83 [00:27<37:42, 27.60s/it][A

tensor(0.5915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:55<37:11, 27.55s/it][A

tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:22<36:44, 27.56s/it][A

tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:50<36:16, 27.55s/it][A

tensor(0.5178, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:18<36:01, 27.72s/it][A

tensor(0.6151, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:45<35:27, 27.63s/it][A

tensor(0.5534, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:13<34:57, 27.60s/it][A

tensor(0.7706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:40<34:26, 27.56s/it][A

tensor(0.6325, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:08<34:07, 27.67s/it][A

tensor(0.5399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:35<33:28, 27.52s/it][A

tensor(0.5213, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [05:03<32:54, 27.43s/it][A

tensor(0.7075, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:30<32:21, 27.35s/it][A

tensor(0.4947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:57<31:50, 27.29s/it][A

tensor(0.5976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:25<31:31, 27.41s/it][A

tensor(0.4760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:52<31:00, 27.36s/it][A

tensor(0.5443, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:19<30:28, 27.29s/it][A

tensor(0.5656, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:46<29:58, 27.25s/it][A

tensor(0.5182, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:14<29:43, 27.43s/it][A

tensor(0.4859, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:41<29:12, 27.38s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:08<28:42, 27.34s/it][A

tensor(0.5704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:36<28:12, 27.30s/it][A

tensor(0.4788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [10:03<27:43, 27.27s/it][A

tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:31<27:23, 27.40s/it][A

tensor(0.7452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:58<26:51, 27.31s/it][A

tensor(0.6761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:25<26:21, 27.26s/it][A

tensor(0.5959, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:52<25:51, 27.22s/it][A

tensor(0.5927, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:20<25:32, 27.37s/it][A

tensor(0.5052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:47<25:03, 27.33s/it][A

tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:14<24:33, 27.28s/it][A

tensor(0.5073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:41<24:04, 27.26s/it][A

tensor(0.5968, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:08<23:35, 27.22s/it][A

tensor(0.6068, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:36<23:14, 27.34s/it][A

tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [15:03<22:43, 27.27s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:30<22:13, 27.22s/it][A

tensor(0.5885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:57<21:46, 27.21s/it][A

tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:25<21:27, 27.40s/it][A

tensor(0.6987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:53<20:58, 27.37s/it][A

tensor(0.6936, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:20<20:30, 27.34s/it][A

tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:47<20:02, 27.32s/it][A

tensor(0.4877, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:14<19:34, 27.31s/it][A

tensor(0.6043, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:42<19:11, 27.43s/it][A

tensor(0.5910, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:09<18:41, 27.36s/it][A

tensor(0.6786, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:36<18:12, 27.32s/it][A

tensor(0.6906, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [20:04<17:42, 27.25s/it][A

tensor(0.5737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:31<17:21, 27.40s/it][A

tensor(0.5827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:59<16:51, 27.34s/it][A

tensor(0.7625, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:26<16:22, 27.28s/it][A

tensor(0.7755, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:53<15:53, 27.23s/it][A

tensor(0.5445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:20<15:24, 27.19s/it][A

tensor(0.5308, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:48<15:02, 27.34s/it][A

tensor(0.5646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:15<14:33, 27.29s/it][A

tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:42<14:04, 27.24s/it][A

tensor(0.6152, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:09<13:36, 27.23s/it][A

tensor(0.5459, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:37<13:15, 27.43s/it][A

tensor(0.6081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [25:04<12:46, 27.39s/it][A

tensor(0.6084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:32<12:18, 27.36s/it][A

tensor(0.7404, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:59<11:50, 27.33s/it][A

tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:26<11:22, 27.30s/it][A

tensor(0.7178, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:54<10:58, 27.44s/it][A

tensor(0.7498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:21<10:30, 27.41s/it][A

tensor(0.6239, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:48<10:02, 27.38s/it][A

tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:16<09:34, 27.36s/it][A

tensor(0.5691, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:44<09:10, 27.50s/it][A

tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:11<08:41, 27.45s/it][A

tensor(0.6656, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:38<08:12, 27.39s/it][A

tensor(0.5540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [30:05<07:44, 27.35s/it][A

tensor(0.6098, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:33<07:17, 27.31s/it][A

tensor(0.4764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [31:00<06:51, 27.45s/it][A

tensor(0.5196, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:28<06:23, 27.38s/it][A

tensor(0.6063, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:55<05:55, 27.32s/it][A

tensor(0.5621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:22<05:27, 27.28s/it][A

tensor(0.5897, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:50<05:02, 27.48s/it][A

tensor(0.5987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:17<04:34, 27.41s/it][A

tensor(0.6135, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:44<04:06, 27.37s/it][A

tensor(0.4789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [34:12<03:38, 27.32s/it][A

tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:39<03:10, 27.28s/it][A

tensor(0.6359, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [35:07<02:44, 27.50s/it][A

tensor(0.5512, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:34<02:17, 27.49s/it][A

tensor(0.5288, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [36:02<01:49, 27.48s/it][A

tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:29<01:22, 27.42s/it][A

tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:57<00:55, 27.54s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:24<00:27, 27.46s/it][A

tensor(0.6657, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:34<00:00, 27.16s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5904, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6027821844600769

	train acc: 0.6693592552026286

	training prec: 0.8552082798028442

	training rec: 0.6693592552026286

	training f1: 0.7275447253000249

	Current Learning rate:  4.285714285714286e-06



  2%|▏         | 1/42 [00:03<02:24,  3.51s/it][A
  5%|▍         | 2/42 [00:06<02:18,  3.45s/it][A
  7%|▋         | 3/42 [00:10<02:14,  3.45s/it][A
 10%|▉         | 4/42 [00:13<02:11,  3.46s/it][A
 12%|█▏        | 5/42 [00:17<02:07,  3.44s/it][A
 14%|█▍        | 6/42 [00:20<02:04,  3.45s/it][A
 17%|█▋        | 7/42 [00:24<02:00,  3.46s/it][A
 19%|█▉        | 8/42 [00:27<01:57,  3.45s/it][A
 21%|██▏       | 9/42 [00:31<01:53,  3.45s/it][A
 24%|██▍       | 10/42 [00:34<01:51,  3.47s/it][A
 26%|██▌       | 11/42 [00:38<01:47,  3.46s/it][A
 29%|██▊       | 12/42 [00:41<01:43,  3.47s/it][A
 31%|███       | 13/42 [00:45<01:40,  3.48s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.46s/it][A
 36%|███▌      | 15/42 [00:51<01:33,  3.47s/it][A
 38%|███▊      | 16/42 [00:55<01:30,  3.48s/it][A
 40%|████      | 17/42 [00:58<01:26,  3.47s/it][A
 43%|████▎     | 18/42 [01:02<01:23,  3.47s/it][A
 45%|████▌     | 19/42 [01:05<01:20,  3.48s/it][A
 48%|████▊     | 20/42 [01:09<01:16,  3


	Validation loss: 0.586679145693779

	Validation acc: 0.6364087301587302

	Validation prec: 0.8611809350029531

	Validation rec: 0.6364087301587302

	Validation f1: 0.7001061216715057
loss: 


  1%|          | 1/83 [00:27<37:39, 27.55s/it][A

tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:55<37:35, 27.84s/it][A

tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:22<36:49, 27.61s/it][A

tensor(0.7042, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:50<36:13, 27.52s/it][A

tensor(0.5333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:17<35:41, 27.46s/it][A

tensor(0.5232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:45<35:15, 27.47s/it][A

tensor(0.5925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:13<34:57, 27.60s/it][A

tensor(0.7193, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:40<34:26, 27.56s/it][A

tensor(0.5846, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:07<33:55, 27.50s/it][A

tensor(0.7189, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:35<33:24, 27.46s/it][A

tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [05:03<33:06, 27.59s/it][A

tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:30<32:33, 27.52s/it][A

tensor(0.5730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:57<32:02, 27.47s/it][A

tensor(0.5329, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:25<31:33, 27.44s/it][A

tensor(0.5596, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:52<31:03, 27.40s/it][A

tensor(0.5668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:20<30:45, 27.55s/it][A

tensor(0.4956, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:47<30:14, 27.49s/it][A

tensor(0.6068, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:15<29:43, 27.45s/it][A

tensor(0.5843, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:42<29:13, 27.39s/it][A

tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:10<28:54, 27.54s/it][A

tensor(0.4785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:37<28:25, 27.51s/it][A

tensor(0.5918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [10:04<27:54, 27.45s/it][A

tensor(0.6108, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:32<27:24, 27.41s/it][A

tensor(0.5401, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:59<26:56, 27.40s/it][A

tensor(0.5990, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:27<26:38, 27.56s/it][A

tensor(0.5780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:54<26:06, 27.48s/it][A

tensor(0.5326, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:22<25:35, 27.43s/it][A

tensor(0.6291, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:49<25:11, 27.47s/it][A

tensor(0.5795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:17<24:50, 27.60s/it][A

tensor(0.6128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:45<24:19, 27.54s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:12<23:48, 27.48s/it][A

tensor(0.6801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:39<23:18, 27.43s/it][A

tensor(0.5911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [15:07<22:50, 27.41s/it][A

tensor(0.5275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:34<22:28, 27.51s/it][A

tensor(0.6375, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [16:02<21:57, 27.45s/it][A

tensor(0.5345, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:29<21:28, 27.42s/it][A

tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:56<21:00, 27.40s/it][A

tensor(0.5491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:24<20:40, 27.56s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:52<20:10, 27.51s/it][A

tensor(0.5083, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:19<19:40, 27.44s/it][A

tensor(0.5672, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:46<19:12, 27.44s/it][A

tensor(0.6772, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:14<18:43, 27.41s/it][A

tensor(0.4626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:42<18:21, 27.55s/it][A

tensor(0.5781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [20:09<17:50, 27.44s/it][A

tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:36<17:19, 27.35s/it][A

tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [21:03<16:49, 27.28s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:31<16:26, 27.41s/it][A

tensor(0.7026, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:58<15:56, 27.32s/it][A

tensor(0.4962, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:25<15:27, 27.28s/it][A

tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:52<14:59, 27.24s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:19<14:28, 27.14s/it][A

tensor(0.5009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:47<14:06, 27.30s/it][A

tensor(0.5911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:14<13:37, 27.24s/it][A

tensor(0.4849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:41<13:08, 27.19s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [25:08<12:40, 27.16s/it][A

tensor(0.6442, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:36<12:18, 27.36s/it][A

tensor(0.4905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [26:03<11:49, 27.29s/it][A

tensor(0.6129, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:30<11:21, 27.26s/it][A

tensor(0.5541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:57<10:54, 27.26s/it][A

tensor(0.5971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:25<10:25, 27.21s/it][A

tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:52<10:02, 27.37s/it][A

tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:19<09:33, 27.31s/it][A

tensor(0.5237, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:47<09:05, 27.27s/it][A

tensor(0.5905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:14<08:37, 27.24s/it][A

tensor(0.5605, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:42<08:13, 27.40s/it][A

tensor(0.7827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [30:09<07:44, 27.33s/it][A

tensor(0.5558, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:36<07:16, 27.27s/it][A

tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [31:03<06:48, 27.25s/it][A

tensor(0.6413, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:30<06:20, 27.21s/it][A

tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:58<05:55, 27.34s/it][A

tensor(0.7638, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:25<05:27, 27.29s/it][A

tensor(0.5426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:52<05:00, 27.31s/it][A

tensor(0.5640, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:20<04:32, 27.26s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:47<04:06, 27.38s/it][A

tensor(0.5666, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [34:14<03:38, 27.33s/it][A

tensor(0.6320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:42<03:10, 27.28s/it][A

tensor(0.6373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [35:09<02:43, 27.22s/it][A

tensor(0.5919, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:36<02:16, 27.23s/it][A

tensor(0.5463, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [36:04<01:49, 27.38s/it][A

tensor(0.5571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:31<01:21, 27.29s/it][A

tensor(0.7587, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:58<00:54, 27.21s/it][A

tensor(0.4997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:25<00:27, 27.17s/it][A

tensor(0.5976, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:34<00:00, 27.16s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5051, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5938304795558194

	train acc: 0.6744249726177437

	training prec: 0.8545938406762882

	training rec: 0.6744249726177437

	training f1: 0.7298465537414898

	Current Learning rate:  2.8571428571428573e-06



  2%|▏         | 1/42 [00:03<02:23,  3.49s/it][A
  5%|▍         | 2/42 [00:06<02:19,  3.48s/it][A
  7%|▋         | 3/42 [00:10<02:24,  3.69s/it][A
 10%|▉         | 4/42 [00:14<02:16,  3.60s/it][A
 12%|█▏        | 5/42 [00:17<02:11,  3.55s/it][A
 14%|█▍        | 6/42 [00:21<02:06,  3.50s/it][A
 17%|█▋        | 7/42 [00:24<02:02,  3.49s/it][A
 19%|█▉        | 8/42 [00:28<01:58,  3.48s/it][A
 21%|██▏       | 9/42 [00:31<01:54,  3.46s/it][A
 24%|██▍       | 10/42 [00:35<01:50,  3.46s/it][A
 26%|██▌       | 11/42 [00:38<01:47,  3.45s/it][A
 29%|██▊       | 12/42 [00:41<01:43,  3.45s/it][A
 31%|███       | 13/42 [00:45<01:39,  3.45s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.45s/it][A
 36%|███▌      | 15/42 [00:52<01:32,  3.44s/it][A
 38%|███▊      | 16/42 [00:55<01:29,  3.45s/it][A
 40%|████      | 17/42 [00:59<01:26,  3.46s/it][A
 43%|████▎     | 18/42 [01:02<01:22,  3.46s/it][A
 45%|████▌     | 19/42 [01:06<01:19,  3.46s/it][A
 48%|████▊     | 20/42 [01:09<01:16,  3


	Validation loss: 0.5838254172177542

	Validation acc: 0.6304563492063492

	Validation prec: 0.8646406205273228

	Validation rec: 0.6304563492063492

	Validation f1: 0.6955875723345532
loss: 


  1%|          | 1/83 [00:27<37:21, 27.33s/it][A

tensor(0.5967, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:54<36:54, 27.33s/it][A

tensor(0.5784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:22<36:29, 27.37s/it][A

tensor(0.6752, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:49<36:00, 27.35s/it][A

tensor(0.5497, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:17<35:48, 27.54s/it][A

tensor(0.6475, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:44<35:12, 27.44s/it][A

tensor(0.5362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:11<34:40, 27.37s/it][A

tensor(0.6276, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:38<34:07, 27.30s/it][A

tensor(0.6905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:06<33:51, 27.46s/it][A

tensor(0.7052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:33<33:18, 27.37s/it][A

tensor(0.6079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [05:01<32:47, 27.32s/it][A

tensor(0.5841, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:28<32:17, 27.28s/it][A

tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:55<31:49, 27.28s/it][A

tensor(0.5490, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:23<31:28, 27.37s/it][A

tensor(0.5580, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:50<30:57, 27.31s/it][A

tensor(0.5676, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:17<30:25, 27.25s/it][A

tensor(0.5482, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:44<29:55, 27.20s/it][A

tensor(0.5958, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:12<29:39, 27.37s/it][A

tensor(0.5824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:39<29:07, 27.30s/it][A

tensor(0.6057, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:06<28:40, 27.31s/it][A

tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:34<28:13, 27.31s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [10:01<27:43, 27.27s/it][A

tensor(0.6348, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:28<27:24, 27.40s/it][A

tensor(0.5506, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:56<26:52, 27.32s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:23<26:21, 27.27s/it][A

tensor(0.6842, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:50<25:51, 27.21s/it][A

tensor(0.6793, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:17<25:31, 27.36s/it][A

tensor(0.5975, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:45<25:02, 27.33s/it][A

tensor(0.5413, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:12<24:32, 27.27s/it][A

tensor(0.6552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:39<24:02, 27.22s/it][A

tensor(0.5282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:06<23:33, 27.19s/it][A

tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:34<23:15, 27.36s/it][A

tensor(0.5429, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [15:01<22:43, 27.27s/it][A

tensor(0.5290, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:28<22:15, 27.24s/it][A

tensor(0.6132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:55<21:45, 27.21s/it][A

tensor(0.5695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:23<21:27, 27.40s/it][A

tensor(0.5362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:50<20:55, 27.30s/it][A

tensor(0.5580, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:17<20:26, 27.25s/it][A

tensor(0.5390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:44<19:56, 27.19s/it][A

tensor(0.6794, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:11<19:28, 27.18s/it][A

tensor(0.5925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:39<19:08, 27.34s/it][A

tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:06<18:38, 27.29s/it][A

tensor(0.5395, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:33<18:09, 27.23s/it][A

tensor(0.5550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [20:01<17:42, 27.24s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:29<17:24, 27.48s/it][A

tensor(0.7134, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:56<16:55, 27.43s/it][A

tensor(0.5749, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:23<16:25, 27.37s/it][A

tensor(0.6991, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:51<15:56, 27.33s/it][A

tensor(0.6198, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:18<15:27, 27.29s/it][A

tensor(0.5883, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:46<15:06, 27.47s/it][A

tensor(0.7380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:13<14:37, 27.43s/it][A

tensor(0.6106, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:40<14:08, 27.38s/it][A

tensor(0.6410, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:08<13:40, 27.36s/it][A

tensor(0.6833, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:35<13:16, 27.46s/it][A

tensor(0.6909, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [25:02<12:46, 27.39s/it][A

tensor(0.4923, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:30<12:17, 27.32s/it][A

tensor(0.6241, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [25:57<11:49, 27.28s/it][A

tensor(0.5038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:24<11:20, 27.22s/it][A

tensor(0.5782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:52<10:57, 27.41s/it][A

tensor(0.4801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:19<10:29, 27.36s/it][A

tensor(0.5856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:46<10:00, 27.29s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:13<09:32, 27.27s/it][A

tensor(0.5385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:41<09:08, 27.41s/it][A

tensor(0.5441, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:08<08:39, 27.35s/it][A

tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:35<08:11, 27.28s/it][A

tensor(0.6104, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [30:03<07:43, 27.26s/it][A

tensor(0.6398, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:30<07:15, 27.25s/it][A

tensor(0.5947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [30:58<06:51, 27.40s/it][A

tensor(0.5506, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:25<06:22, 27.33s/it][A

tensor(0.5716, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:52<05:54, 27.25s/it][A

tensor(0.5483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:19<05:26, 27.23s/it][A

tensor(0.5682, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:47<05:01, 27.38s/it][A

tensor(0.5363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:14<04:33, 27.33s/it][A

tensor(0.6002, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:41<04:05, 27.30s/it][A

tensor(0.5719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [34:08<03:38, 27.28s/it][A

tensor(0.5334, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:36<03:10, 27.24s/it][A

tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [35:03<02:44, 27.41s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:31<02:16, 27.34s/it][A

tensor(0.5733, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [35:58<01:49, 27.29s/it][A

tensor(0.6401, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:25<01:21, 27.24s/it][A

tensor(0.6618, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:53<00:54, 27.40s/it][A

tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:20<00:27, 27.33s/it][A

tensor(0.6193, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:29<00:00, 27.10s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5926, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6017625827387155

	train acc: 0.6542990142387732

	training prec: 0.8481659392092489

	training rec: 0.6542990142387732

	training f1: 0.7131351285308488

	Current Learning rate:  1.4285714285714286e-06



  2%|▏         | 1/42 [00:03<02:21,  3.44s/it][A
  5%|▍         | 2/42 [00:06<02:18,  3.46s/it][A
  7%|▋         | 3/42 [00:10<02:14,  3.46s/it][A
 10%|▉         | 4/42 [00:13<02:10,  3.43s/it][A
 12%|█▏        | 5/42 [00:17<02:07,  3.44s/it][A
 14%|█▍        | 6/42 [00:20<02:03,  3.44s/it][A
 17%|█▋        | 7/42 [00:24<02:00,  3.44s/it][A
 19%|█▉        | 8/42 [00:27<01:56,  3.44s/it][A
 21%|██▏       | 9/42 [00:31<01:53,  3.45s/it][A
 24%|██▍       | 10/42 [00:34<01:50,  3.45s/it][A
 26%|██▌       | 11/42 [00:37<01:47,  3.45s/it][A
 29%|██▊       | 12/42 [00:41<01:43,  3.46s/it][A
 31%|███       | 13/42 [00:44<01:39,  3.45s/it][A
 33%|███▎      | 14/42 [00:48<01:36,  3.45s/it][A
 36%|███▌      | 15/42 [00:51<01:33,  3.45s/it][A
 38%|███▊      | 16/42 [00:55<01:29,  3.46s/it][A
 40%|████      | 17/42 [00:58<01:26,  3.47s/it][A
 43%|████▎     | 18/42 [01:02<01:23,  3.47s/it][A
 45%|████▌     | 19/42 [01:05<01:19,  3.45s/it][A
 48%|████▊     | 20/42 [01:09<01:16,  3


	Validation loss: 0.581547234029997

	Validation acc: 0.6215277777777778

	Validation prec: 0.8636390009763414

	Validation rec: 0.6215277777777778

	Validation f1: 0.6890346246587774
loss: 


  1%|          | 1/83 [00:27<37:22, 27.34s/it][A

tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:55<37:11, 27.54s/it][A

tensor(0.6241, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:22<36:41, 27.52s/it][A

tensor(0.5964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:49<36:08, 27.44s/it][A

tensor(0.4868, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [02:17<35:36, 27.39s/it][A

tensor(0.5665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:44<35:07, 27.37s/it][A

tensor(0.6384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [03:12<34:53, 27.55s/it][A

tensor(0.5234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [03:39<34:22, 27.50s/it][A

tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [04:07<33:52, 27.47s/it][A

tensor(0.5452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [04:34<33:22, 27.44s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [05:02<33:08, 27.61s/it][A

tensor(0.6026, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [05:29<32:27, 27.42s/it][A

tensor(0.5831, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [05:56<31:51, 27.31s/it][A

tensor(0.5168, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [06:23<31:19, 27.24s/it][A

tensor(0.6032, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [06:50<30:49, 27.20s/it][A

tensor(0.6058, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [07:18<30:29, 27.30s/it][A

tensor(0.5802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [07:45<29:56, 27.22s/it][A

tensor(0.6778, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [08:12<29:24, 27.15s/it][A

tensor(0.5699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [08:39<28:56, 27.13s/it][A

tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [09:06<28:32, 27.18s/it][A

tensor(0.5920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [09:34<28:07, 27.22s/it][A

tensor(0.5843, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [10:01<27:36, 27.15s/it][A

tensor(0.7047, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [10:28<27:06, 27.11s/it][A

tensor(0.6174, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [10:54<26:36, 27.05s/it][A

tensor(0.5530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [11:22<26:21, 27.26s/it][A

tensor(0.5286, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [11:50<25:56, 27.31s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [12:17<25:30, 27.32s/it][A

tensor(0.4906, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [12:44<25:03, 27.34s/it][A

tensor(0.5913, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [13:12<24:39, 27.40s/it][A

tensor(0.5555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [13:40<24:19, 27.54s/it][A

tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [14:07<23:49, 27.50s/it][A

tensor(0.5329, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [14:35<23:20, 27.45s/it][A

tensor(0.6130, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [15:02<22:52, 27.45s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [15:30<22:32, 27.60s/it][A

tensor(0.5746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [15:57<22:01, 27.54s/it][A

tensor(0.4781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [16:25<21:31, 27.47s/it][A

tensor(0.7093, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [16:52<21:02, 27.44s/it][A

tensor(0.5713, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [17:20<20:40, 27.57s/it][A

tensor(0.4769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [17:47<20:11, 27.53s/it][A

tensor(0.7802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [18:15<19:42, 27.49s/it][A

tensor(0.6581, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [18:42<19:13, 27.46s/it][A

tensor(0.5817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [19:09<18:44, 27.43s/it][A

tensor(0.6217, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [19:37<18:22, 27.56s/it][A

tensor(0.6045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [20:05<17:52, 27.50s/it][A

tensor(0.6423, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [20:32<17:22, 27.43s/it][A

tensor(0.6925, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [20:59<16:53, 27.38s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [21:27<16:30, 27.51s/it][A

tensor(0.4995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [21:54<16:00, 27.44s/it][A

tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [22:22<15:31, 27.39s/it][A

tensor(0.5815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [22:49<15:03, 27.37s/it][A

tensor(0.5724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [23:16<14:34, 27.34s/it][A

tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [23:44<14:11, 27.47s/it][A

tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [24:11<13:41, 27.37s/it][A

tensor(0.5482, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [24:38<13:12, 27.32s/it][A

tensor(0.5584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [25:05<12:43, 27.26s/it][A

tensor(0.8260, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [25:33<12:20, 27.43s/it][A

tensor(0.6814, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [26:00<11:51, 27.35s/it][A

tensor(0.5795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [26:28<11:22, 27.31s/it][A

tensor(0.6145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [26:55<10:53, 27.24s/it][A

tensor(0.5421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [27:22<10:26, 27.23s/it][A

tensor(0.4954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [27:50<10:02, 27.41s/it][A

tensor(0.6016, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [28:17<09:34, 27.35s/it][A

tensor(0.5960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [28:44<09:05, 27.29s/it][A

tensor(0.5051, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [29:11<08:38, 27.28s/it][A

tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [29:39<08:13, 27.43s/it][A

tensor(0.6583, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [30:06<07:45, 27.37s/it][A

tensor(0.5430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [30:34<07:16, 27.31s/it][A

tensor(0.5937, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [31:01<06:48, 27.26s/it][A

tensor(0.5413, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [31:28<06:21, 27.23s/it][A

tensor(0.6498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [31:56<05:55, 27.38s/it][A

tensor(0.5730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [32:23<05:27, 27.32s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [32:50<05:00, 27.29s/it][A

tensor(0.5177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [33:17<04:33, 27.31s/it][A

tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [33:45<04:07, 27.53s/it][A

tensor(0.5619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [34:13<03:40, 27.51s/it][A

tensor(0.6868, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [34:40<03:12, 27.49s/it][A

tensor(0.5387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [35:08<02:44, 27.49s/it][A

tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [35:35<02:17, 27.48s/it][A

tensor(0.5299, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [36:03<01:50, 27.67s/it][A

tensor(0.7080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [36:31<01:22, 27.60s/it][A

tensor(0.5039, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [36:58<00:55, 27.55s/it][A

tensor(0.5962, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [37:26<00:27, 27.53s/it][A

tensor(0.6761, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [37:35<00:00, 27.18s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.8894, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6007264522184809

	train acc: 0.6543503559693319

	training prec: 0.8523426254377233

	training rec: 0.6543503559693319

	training f1: 0.7146045344378537

	Current Learning rate:  0.0



  2%|▏         | 1/42 [00:03<02:24,  3.53s/it][A
  5%|▍         | 2/42 [00:06<02:19,  3.48s/it][A
  7%|▋         | 3/42 [00:10<02:16,  3.49s/it][A
 10%|▉         | 4/42 [00:14<02:17,  3.62s/it][A
 12%|█▏        | 5/42 [00:18<02:16,  3.69s/it][A
 14%|█▍        | 6/42 [00:21<02:10,  3.63s/it][A
 17%|█▋        | 7/42 [00:25<02:05,  3.60s/it][A
 19%|█▉        | 8/42 [00:28<02:00,  3.56s/it][A
 21%|██▏       | 9/42 [00:32<01:57,  3.55s/it][A
 24%|██▍       | 10/42 [00:35<01:53,  3.54s/it][A
 26%|██▌       | 11/42 [00:39<01:48,  3.51s/it][A
 29%|██▊       | 12/42 [00:42<01:45,  3.51s/it][A
 31%|███       | 13/42 [00:46<01:42,  3.52s/it][A
 33%|███▎      | 14/42 [00:49<01:38,  3.50s/it][A
 36%|███▌      | 15/42 [00:53<01:34,  3.50s/it][A
 38%|███▊      | 16/42 [00:56<01:31,  3.52s/it][A
 40%|████      | 17/42 [01:00<01:27,  3.50s/it][A
 43%|████▎     | 18/42 [01:03<01:24,  3.51s/it][A
 45%|████▌     | 19/42 [01:07<01:20,  3.51s/it][A
 48%|████▊     | 20/42 [01:10<01:17,  3


	Validation loss: 0.5801786688112077

	Validation acc: 0.6202876984126984

	Validation prec: 0.8446590315195364

	Validation rec: 0.6202876984126984

	Validation f1: 0.6808587773163859





In [21]:
# train_loss.numpy()
# train_loss


In [22]:
# # Saving models
# torch.save(model.state_dict(), "finetuned.pth")

# #load models
# model = Neural

In [23]:
## saving the model 
torch.save(model.state_dict(), "finetuned-35-epochs-with-weighted-loss_lr_5e5.pth")

In [24]:
# loading the locally saved model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = CausalityBERT()
model.load_state_dict(torch.load("finetuned-35-epochs-with-weighted-loss_lr_5e5.pth"))
## Move the model to the GPU 
model.to(device)
model.eval() # gettign in the eval mode 



You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.0.output.dense.weight', 'roberta.embeddings.token_type_embeddings.weight', 'roberta.encoder.layer.9.output.dense.weight', 'roberta.encoder.layer.5.attention.self.key.weight', 'roberta.encoder.layer.7.attention.self.query.bias', 'roberta.encoder.layer.0.attention.self.key.bias', 'roberta.encoder.layer.7.intermediate.dense.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.6.attention.self.key.weight', 'lm_head.bias', 'roberta.encoder.layer.6.attention.self.query.bias', 'roberta.encoder.layer.11.attention.self.query.weight', 'roberta.embeddings.LayerNorm.weight', 'roberta.encoder.layer.4.intermediate.dense.weight', 'roberta.encoder.layer.0.attention.output.dense.bias', 'robe

CausalityBERT(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(130, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True

## evaluation on the test dataset 



In [25]:
## evaluation on the test dataset 

# test_accuracy, test_mcc_accuracy, nb_test_steps = 0, 0, 0 
# test_mcc_accuracy, nb_test_steps = 0, 0,

# test_accuracy = []
# test_loss = []
# test_acc = []
# test_prec = []
# test_rec = []
# test_f1 = []

# for batch in tqdm(test_loader):
#     batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
#     b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
#     with torch.no_grad():
#         model.eval()
#         logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
    
    
#     loss = loss_fn(logits, b_labels)
#     test_loss.append(loss.item())

#     # move logits and labels to CPU
#     logits = logits.detach().to('cpu').numpy()
#     label_ids = b_labels.to('cpu').numpy()

#     pred_flat = np.argmax(logits, axis=1).flatten()
#     labels_flat = label_ids.flatten()

# #     eval_accuracy += accuracy_score(labels_flat, pred_flat)
# #     test_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  


#     metrics = compute_metrics(pred_flat, labels_flat)
#     test_acc.append(metrics["accuracy"])
#     test_prec.append(metrics["precision"])
#     test_rec.append(metrics["recall"])
#     test_f1.append(metrics["f1"])
# #     nb_test_steps += 1
    

# # print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
# # print(F'\n\ttest MCC Accuracy: {test_mcc_accuracy / nb_test_steps}') # eval M
# print(F'\n\ttest loss: {np.mean(test_loss)}')
# print(F'\n\ttest acc: {np.mean(test_acc)}')
# print(F'\n\ttest prec: {np.mean(test_prec)}')
# print(F'\n\ttest rec: {np.mean(test_rec)}')
# print(F'\n\ttest f1: {np.mean(test_f1)}')

In [26]:
loss_fn = CrossEntropyLoss()
test_loss = []
test_acc = []
test_prec = []
test_rec = []
test_f1 = []


for batch in tqdm(test_loader):
    batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
    b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader

    with torch.no_grad():
        model.eval()
        logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
    
    print("logits:")
    print(logits)
    # move logits and labels to CPU
    logits = logits.detach().to('cpu').numpy()
    label_ids = b_labels.to('cpu').numpy()

    pred_flat = np.argmax(logits, axis=1).flatten()
    labels_flat = label_ids.flatten()
    print("pred:", pred_flat)
    print("real:", labels_flat)
    
    metrics = compute_metrics(pred_flat, labels_flat)
    test_acc.append(metrics["accuracy"])
    test_prec.append(metrics["precision"])
    test_rec.append(metrics["recall"])
    test_f1.append(metrics["f1"])

    
print(F'\n\ttest loss: {np.mean(test_loss)}')
print(F'\n\ttest acc: {np.mean(test_acc)}')
print(F'\n\ttest prec: {np.mean(test_prec)}')
print(F'\n\ttest rec: {np.mean(test_rec)}')
print(F'\n\ttest f1: {np.mean(test_f1)}')





  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
  2%|▏         | 1/52 [00:04<03:49,  4.51s/it]

logits:
tensor([[ 0.4904, -0.2755],
        [-0.1013,  0.4080],
        [ 0.3521, -0.2036],
        [-0.3749,  0.6805],
        [ 0.8254, -0.6578],
        [ 0.7953, -0.6137],
        [ 1.2399, -1.0747],
        [ 0.9515, -0.8847],
        [-0.1822,  0.5042],
        [ 0.1704,  0.0170],
        [ 0.7373, -0.5463],
        [-0.4168,  0.7147],
        [ 1.0940, -0.9389],
        [ 0.6103, -0.4381],
        [-0.4203,  0.7287],
        [ 1.1089, -0.9114],
        [ 0.1519,  0.1079],
        [ 0.0884,  0.1384],
        [ 0.8410, -0.6802],
        [-0.3653,  0.6362],
        [ 0.0813,  0.1463],
        [ 1.0591, -0.8933],
        [-0.3605,  0.6379],
        [-0.0293,  0.3212],
        [ 0.1037,  0.1493],
        [ 0.5919, -0.3467],
        [-0.3024,  0.5542],
        [ 0.0366,  0.2134],
        [ 0.7869, -0.5544],
        [ 0.0806,  0.1845],
        [ 0.3470, -0.1904],
        [-0.2136,  0.4390]], device='cuda:0')
pred: [0 1 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 1 1 0 1 1 1 0 1 1 0 1 0 1]
real: 

  4%|▍         | 2/52 [00:09<03:45,  4.51s/it]

logits:
tensor([[ 0.8242, -0.6847],
        [ 0.1357,  0.1073],
        [ 0.9094, -0.7099],
        [ 0.0940,  0.1681],
        [-0.0374,  0.3418],
        [ 1.0025, -0.8492],
        [ 0.8681, -0.6322],
        [ 0.8996, -0.7066],
        [ 0.5499, -0.3393],
        [-0.0636,  0.4098],
        [ 0.9420, -0.8431],
        [-0.4836,  0.7619],
        [ 0.1729,  0.0881],
        [-0.0044,  0.2906],
        [ 0.1902,  0.0671],
        [ 1.2415, -1.0608],
        [ 0.0604,  0.1817],
        [ 0.6077, -0.4325],
        [ 0.0063,  0.2267],
        [-0.1007,  0.3567],
        [ 0.8361, -0.6892],
        [ 0.7613, -0.6424],
        [ 0.3756, -0.2493],
        [ 0.0450,  0.2268],
        [-0.3300,  0.6435],
        [ 0.4876, -0.3139],
        [-0.4127,  0.7006],
        [ 0.9406, -0.7156],
        [ 0.1544,  0.0218],
        [-0.1705,  0.4815],
        [-0.1925,  0.5065],
        [-0.1105,  0.3976]], device='cuda:0')
pred: [0 0 0 1 1 0 0 0 0 1 0 1 0 1 0 0 1 0 1 1 0 0 0 1 1 0 1 0 0 1 1 1]
real: 

  6%|▌         | 3/52 [00:13<03:38,  4.47s/it]

logits:
tensor([[ 0.3999, -0.1835],
        [-0.1001,  0.3750],
        [-0.3845,  0.6616],
        [-0.0208,  0.2325],
        [-0.4006,  0.6751],
        [-0.3937,  0.6471],
        [ 0.6721, -0.4554],
        [ 0.0377,  0.2195],
        [ 1.1925, -0.9532],
        [ 0.8004, -0.6100],
        [ 0.5199, -0.3692],
        [ 0.8865, -0.7058],
        [ 0.7527, -0.6033],
        [ 0.4164, -0.3353],
        [ 0.2359,  0.0259],
        [ 1.3209, -1.1923],
        [-0.1676,  0.4243],
        [-0.0310,  0.3184],
        [-0.2397,  0.4678],
        [-0.1942,  0.4401],
        [-0.3086,  0.6007],
        [ 0.7876, -0.6345],
        [ 0.9560, -0.7706],
        [ 0.2651, -0.0910],
        [-0.3918,  0.6885],
        [ 0.3855, -0.1791],
        [ 0.9005, -0.7961],
        [-0.1824,  0.5061],
        [ 0.4539, -0.2592],
        [ 0.5628, -0.3756],
        [ 0.0803,  0.2205],
        [ 0.8784, -0.7871]], device='cuda:0')
pred: [0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 1 0 0 1 0 0 1 0]
real: 

  8%|▊         | 4/52 [00:17<03:33,  4.45s/it]

logits:
tensor([[ 0.8131, -0.6403],
        [ 0.1542,  0.0434],
        [-0.0134,  0.2800],
        [ 0.6681, -0.5114],
        [ 0.6690, -0.5050],
        [-0.3008,  0.6093],
        [ 0.1933,  0.0769],
        [-0.1492,  0.4101],
        [ 0.7984, -0.6380],
        [ 0.1569,  0.0825],
        [-0.2324,  0.5376],
        [-0.3400,  0.6242],
        [-0.3377,  0.6455],
        [ 0.0130,  0.2724],
        [-0.2028,  0.5424],
        [ 0.4560, -0.2459],
        [ 0.6465, -0.4579],
        [ 0.2128,  0.0406],
        [-0.0881,  0.3177],
        [ 0.9077, -0.6765],
        [ 0.9043, -0.7296],
        [ 0.1870,  0.0484],
        [-0.0543,  0.3809],
        [-0.2133,  0.4699],
        [ 0.7692, -0.6268],
        [ 0.1611,  0.0389],
        [ 1.0232, -0.8037],
        [-0.3280,  0.6015],
        [-0.2720,  0.5022],
        [-0.3645,  0.6248],
        [ 0.2351, -0.0514],
        [-0.2467,  0.5496]], device='cuda:0')
pred: [0 0 1 0 0 1 0 1 0 0 1 1 1 1 1 0 0 0 1 0 0 0 1 1 0 0 0 1 1 1 0 1]
real: 

 10%|▉         | 5/52 [00:22<03:29,  4.46s/it]

logits:
tensor([[ 0.7351, -0.5753],
        [ 0.2663, -0.0728],
        [ 0.1103,  0.0943],
        [ 0.6147, -0.3840],
        [-0.0661,  0.3872],
        [ 0.3449, -0.1522],
        [ 0.3346, -0.1291],
        [ 0.9750, -0.8371],
        [-0.1704,  0.4510],
        [-0.3793,  0.6403],
        [-0.0123,  0.2955],
        [ 1.0670, -0.8859],
        [ 0.4829, -0.3217],
        [ 0.1439,  0.1338],
        [ 1.0733, -0.9230],
        [ 1.0273, -0.9164],
        [-0.3285,  0.6058],
        [ 0.7706, -0.5872],
        [ 0.6353, -0.3913],
        [ 0.4200, -0.2009],
        [-0.2379,  0.5373],
        [ 0.8830, -0.7400],
        [ 0.4572, -0.2092],
        [ 0.4414, -0.2697],
        [-0.3216,  0.5923],
        [ 0.7897, -0.5740],
        [ 0.4720, -0.3318],
        [-0.3108,  0.5707],
        [ 0.1844,  0.0999],
        [ 0.5905, -0.4537],
        [ 0.5876, -0.4080],
        [ 0.0114,  0.2873]], device='cuda:0')
pred: [0 0 0 0 1 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1]
real: 

 12%|█▏        | 6/52 [00:26<03:24,  4.45s/it]

logits:
tensor([[-0.0267,  0.3375],
        [ 1.0128, -0.9031],
        [ 0.6161, -0.4457],
        [ 0.5716, -0.3306],
        [-0.1856,  0.4463],
        [ 1.1027, -0.9414],
        [ 1.2793, -1.1696],
        [ 0.5503, -0.3875],
        [ 0.2847, -0.0876],
        [ 0.6504, -0.4647],
        [ 0.0755,  0.1907],
        [ 0.9845, -0.8779],
        [ 0.1901,  0.1064],
        [ 0.5040, -0.3077],
        [ 0.4947, -0.2526],
        [ 0.1582,  0.1246],
        [ 0.5152, -0.3758],
        [ 0.1693,  0.0419],
        [-0.1753,  0.4806],
        [-0.2069,  0.4849],
        [-0.3619,  0.6819],
        [-0.2979,  0.6034],
        [-0.1541,  0.4499],
        [-0.0122,  0.2916],
        [ 0.3012, -0.0871],
        [-0.2220,  0.4876],
        [ 0.5196, -0.3095],
        [ 1.0856, -0.9268],
        [ 0.0718,  0.1361],
        [ 0.0703,  0.1098],
        [ 0.3568, -0.1690],
        [ 0.0190,  0.2394]], device='cuda:0')
pred: [1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1 0 1]
real: 

 13%|█▎        | 7/52 [00:31<03:20,  4.45s/it]

logits:
tensor([[ 0.5947, -0.4207],
        [-0.2145,  0.4896],
        [ 1.0533, -0.9485],
        [ 0.7858, -0.5511],
        [-0.1349,  0.4252],
        [ 0.0573,  0.1892],
        [ 0.9968, -0.8056],
        [-0.2088,  0.4983],
        [ 0.3352, -0.1219],
        [-0.1989,  0.5307],
        [-0.2825,  0.5488],
        [-0.1178,  0.4379],
        [ 0.5487, -0.3470],
        [ 0.2102,  0.0174],
        [ 0.1465,  0.1240],
        [-0.0994,  0.3603],
        [ 0.9828, -0.8518],
        [-0.0754,  0.3853],
        [-0.2806,  0.5871],
        [-0.2738,  0.5794],
        [ 0.8176, -0.6461],
        [-0.1760,  0.4964],
        [ 1.1709, -1.0278],
        [ 0.2009, -0.0197],
        [ 0.4590, -0.3122],
        [ 0.2983, -0.0714],
        [ 0.9823, -0.8451],
        [-0.3170,  0.6452],
        [-0.3034,  0.6001],
        [ 0.8560, -0.7044],
        [ 0.4468, -0.2517],
        [-0.1148,  0.3870]], device='cuda:0')
pred: [0 1 0 0 1 1 0 1 0 1 1 1 0 0 0 1 0 1 1 1 0 1 0 0 0 0 0 1 1 0 0 1]
real: 

 15%|█▌        | 8/52 [00:35<03:15,  4.44s/it]

logits:
tensor([[ 0.1520,  0.0550],
        [-0.0853,  0.3487],
        [ 1.2506, -1.1956],
        [ 0.7164, -0.4940],
        [-0.3745,  0.7057],
        [ 0.4749, -0.2672],
        [ 0.0756,  0.1928],
        [-0.3495,  0.6485],
        [-0.3065,  0.5662],
        [-0.1990,  0.4734],
        [ 0.4887, -0.2534],
        [ 1.2042, -1.1035],
        [ 0.5170, -0.3503],
        [-0.4744,  0.7296],
        [ 0.5137, -0.3192],
        [ 0.4951, -0.3173],
        [-0.3231,  0.5995],
        [ 0.9644, -0.8017],
        [ 0.0642,  0.2430],
        [-0.3522,  0.6350],
        [-0.2942,  0.5923],
        [-0.3184,  0.5636],
        [ 0.9794, -0.9074],
        [ 0.7982, -0.6422],
        [ 0.0183,  0.2564],
        [-0.0269,  0.3213],
        [ 0.6580, -0.4365],
        [-0.3678,  0.6396],
        [ 0.4951, -0.2570],
        [ 0.1892,  0.1345],
        [-0.3040,  0.5818],
        [ 0.2642,  0.0190]], device='cuda:0')
pred: [0 1 0 0 1 0 1 1 1 1 0 0 0 1 0 0 1 0 1 1 1 1 0 0 1 1 0 1 0 0 1 0]
real: 

 17%|█▋        | 9/52 [00:40<03:10,  4.44s/it]

logits:
tensor([[ 0.4766, -0.3468],
        [ 0.6879, -0.5262],
        [ 0.7506, -0.5446],
        [-0.2371,  0.5402],
        [-0.2384,  0.4900],
        [-0.1724,  0.4605],
        [ 0.7233, -0.5255],
        [-0.1891,  0.4734],
        [-0.4729,  0.7769],
        [-0.0221,  0.3301],
        [ 0.7246, -0.5831],
        [-0.3324,  0.5954],
        [ 0.1440,  0.1148],
        [ 0.0284,  0.1905],
        [-0.0467,  0.2690],
        [ 0.3697, -0.0832],
        [ 0.3898, -0.1612],
        [-0.1214,  0.3795],
        [ 0.0403,  0.2177],
        [ 0.5710, -0.3409],
        [-0.3807,  0.7246],
        [-0.2432,  0.5178],
        [ 0.4489, -0.1825],
        [-0.2591,  0.5389],
        [-0.0873,  0.3908],
        [-0.3066,  0.6131],
        [ 0.4476, -0.2285],
        [ 0.9783, -0.7853],
        [ 0.1311,  0.1469],
        [ 0.0054,  0.2711],
        [-0.2207,  0.4675],
        [ 0.9067, -0.7273]], device='cuda:0')
pred: [0 0 0 1 1 1 0 1 1 1 0 1 0 1 1 0 0 1 1 0 1 1 0 1 1 1 0 0 1 1 1 0]
real: 

 19%|█▉        | 10/52 [00:44<03:07,  4.48s/it]

logits:
tensor([[ 0.1975,  0.0544],
        [-0.1496,  0.4748],
        [-0.1700,  0.4823],
        [ 0.2660,  0.0052],
        [-0.0245,  0.2946],
        [ 0.1424,  0.0506],
        [-0.2268,  0.5271],
        [-0.3089,  0.5621],
        [ 0.4438, -0.2121],
        [-0.3198,  0.6016],
        [-0.0222,  0.2867],
        [-0.1851,  0.4631],
        [ 0.7225, -0.4872],
        [-0.2319,  0.5221],
        [ 0.5248, -0.3111],
        [-0.0794,  0.3590],
        [-0.3041,  0.5749],
        [-0.1352,  0.4796],
        [-0.0406,  0.3534],
        [ 0.8993, -0.7346],
        [ 0.7710, -0.6287],
        [ 0.4483, -0.1759],
        [ 0.6271, -0.4305],
        [ 0.8837, -0.6875],
        [-0.3348,  0.6145],
        [-0.1794,  0.4507],
        [ 0.0257,  0.2863],
        [-0.0693,  0.3127],
        [ 0.3653, -0.1801],
        [-0.2330,  0.5428],
        [-0.0805,  0.3601],
        [-0.3344,  0.5913]], device='cuda:0')
pred: [0 1 1 0 1 0 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 0 0 1 1 1 1 0 1 1 1]
real: 

 21%|██        | 11/52 [00:49<03:03,  4.47s/it]

logits:
tensor([[-0.2776,  0.5873],
        [ 0.7659, -0.6439],
        [ 0.2187,  0.0285],
        [ 0.2276, -0.0030],
        [ 0.3177, -0.1181],
        [ 0.2620, -0.0507],
        [ 0.7148, -0.5471],
        [ 0.9660, -0.8065],
        [ 1.1272, -0.9708],
        [-0.1386,  0.4381],
        [-0.0426,  0.3482],
        [ 0.8352, -0.6349],
        [-0.4387,  0.7317],
        [-0.0453,  0.3378],
        [ 0.3950, -0.2036],
        [ 1.3158, -1.2000],
        [-0.3137,  0.5839],
        [-0.3461,  0.6244],
        [ 0.0433,  0.1844],
        [-0.0942,  0.3731],
        [ 0.2209,  0.0827],
        [-0.1630,  0.4866],
        [ 0.6311, -0.5074],
        [-0.2579,  0.5385],
        [-0.2279,  0.5281],
        [ 0.2966, -0.1535],
        [ 0.8574, -0.6488],
        [ 0.7654, -0.6210],
        [-0.0438,  0.3559],
        [-0.0725,  0.3050],
        [-0.1146,  0.4340],
        [ 0.0090,  0.2557]], device='cuda:0')
pred: [1 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 1 1 1 1 0 1 0 1 1 0 0 0 1 1 1 1]
real: 

 23%|██▎       | 12/52 [00:53<02:58,  4.46s/it]

logits:
tensor([[ 1.0294, -0.8482],
        [ 1.1548, -0.9433],
        [ 1.0759, -0.9686],
        [ 0.2587,  0.0208],
        [ 0.6980, -0.4776],
        [ 0.1535,  0.0847],
        [-0.2034,  0.4537],
        [ 0.0811,  0.1302],
        [ 0.8604, -0.7335],
        [ 0.7022, -0.4965],
        [ 0.3155, -0.0818],
        [-0.3318,  0.6129],
        [-0.0924,  0.3786],
        [ 0.6787, -0.4818],
        [-0.1119,  0.3762],
        [ 0.9061, -0.7409],
        [-0.0566,  0.3147],
        [ 0.1195,  0.1135],
        [ 0.6051, -0.3933],
        [-0.1570,  0.4772],
        [ 0.3452, -0.0873],
        [-0.0121,  0.2365],
        [ 0.8048, -0.6805],
        [ 0.5595, -0.3997],
        [-0.3418,  0.6670],
        [ 0.4446, -0.2867],
        [ 0.8142, -0.6510],
        [ 0.5482, -0.4063],
        [-0.2945,  0.6275],
        [ 0.9466, -0.8443],
        [ 0.5518, -0.4082],
        [ 1.0022, -0.8773]], device='cuda:0')
pred: [0 0 0 0 0 0 1 1 0 0 0 1 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 0 0]
real: 

 25%|██▌       | 13/52 [00:58<02:54,  4.47s/it]

logits:
tensor([[-0.1913,  0.4727],
        [-0.2031,  0.5358],
        [ 0.8858, -0.7396],
        [-0.1893,  0.4458],
        [ 0.5131, -0.2987],
        [ 0.2309,  0.0503],
        [ 0.8712, -0.6958],
        [-0.2328,  0.5285],
        [ 0.8159, -0.5922],
        [ 0.3131, -0.1081],
        [ 1.1963, -1.0663],
        [ 0.9436, -0.7818],
        [-0.4821,  0.7547],
        [ 0.1522,  0.0486],
        [ 0.7569, -0.5999],
        [-0.1742,  0.4741],
        [ 1.0234, -0.8720],
        [ 0.5016, -0.2767],
        [-0.3120,  0.5812],
        [ 1.0632, -0.8121],
        [-0.0623,  0.3626],
        [ 1.1186, -1.0046],
        [-0.0700,  0.3424],
        [ 0.1164,  0.1890],
        [ 0.7277, -0.4966],
        [ 0.5750, -0.3171],
        [ 0.1773,  0.0693],
        [ 1.2358, -1.1114],
        [ 0.8509, -0.6571],
        [-0.0096,  0.2950],
        [ 0.1292,  0.0773],
        [ 0.8749, -0.6653]], device='cuda:0')
pred: [1 1 0 1 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0]
real: 

 27%|██▋       | 14/52 [01:02<02:49,  4.45s/it]

logits:
tensor([[-0.3049,  0.5821],
        [ 0.3980, -0.1308],
        [-0.2758,  0.5611],
        [ 0.1097,  0.0846],
        [ 0.1075,  0.1621],
        [-0.1577,  0.4637],
        [ 0.5110, -0.3107],
        [-0.0228,  0.2506],
        [ 1.1586, -0.9585],
        [-0.1476,  0.3986],
        [-0.1426,  0.4428],
        [ 0.5434, -0.3302],
        [-0.2242,  0.5361],
        [ 1.0394, -0.9587],
        [-0.2181,  0.5189],
        [-0.0964,  0.3901],
        [ 0.5421, -0.3163],
        [-0.1904,  0.4766],
        [ 0.1190,  0.1823],
        [ 0.7603, -0.6002],
        [-0.0104,  0.2349],
        [ 0.0814,  0.1165],
        [-0.2274,  0.4810],
        [-0.3339,  0.6389],
        [-0.2767,  0.5611],
        [-0.2219,  0.4187],
        [-0.1222,  0.4280],
        [ 0.3569, -0.0612],
        [ 0.5493, -0.4137],
        [ 0.4731, -0.2785],
        [ 0.0444,  0.2373],
        [-0.3135,  0.5640]], device='cuda:0')
pred: [1 0 1 0 1 1 0 1 0 1 1 0 1 0 1 1 0 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1]
real: 

 29%|██▉       | 15/52 [01:06<02:45,  4.46s/it]

logits:
tensor([[ 0.0466,  0.1237],
        [-0.0875,  0.4289],
        [ 0.5429, -0.3432],
        [ 1.1831, -1.0150],
        [ 0.9280, -0.8305],
        [ 0.2338,  0.0529],
        [ 0.7618, -0.5315],
        [ 1.0162, -0.8644],
        [ 0.9398, -0.7310],
        [ 0.3420, -0.1522],
        [ 0.8718, -0.7715],
        [ 1.0308, -0.8590],
        [-0.3788,  0.6377],
        [ 0.4363, -0.2634],
        [-0.1100,  0.4048],
        [ 0.3512, -0.1332],
        [ 0.7066, -0.5541],
        [-0.2043,  0.5462],
        [ 0.5236, -0.2796],
        [ 1.0287, -0.9366],
        [ 0.5981, -0.4378],
        [-0.1583,  0.5239],
        [ 0.2304, -0.0644],
        [-0.2571,  0.5977],
        [ 0.9940, -0.8448],
        [-0.1202,  0.3624],
        [-0.0126,  0.2295],
        [-0.1012,  0.4025],
        [-0.1637,  0.4689],
        [-0.3532,  0.6018],
        [-0.0149,  0.2872],
        [ 0.0888,  0.1804]], device='cuda:0')
pred: [1 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 1 1 1 1 1 1 1]
real: 

 31%|███       | 16/52 [01:11<02:40,  4.45s/it]

logits:
tensor([[-0.3055,  0.6341],
        [-0.3237,  0.6037],
        [ 0.1972,  0.0494],
        [ 0.5387, -0.3984],
        [ 1.0490, -0.9433],
        [-0.0229,  0.3240],
        [ 0.7593, -0.5732],
        [ 0.3833, -0.1331],
        [-0.2317,  0.5376],
        [ 0.8458, -0.7002],
        [ 1.0381, -0.8758],
        [ 0.0436,  0.2189],
        [ 0.3525, -0.0448],
        [ 0.2605, -0.0550],
        [-0.0907,  0.3828],
        [ 0.7918, -0.6621],
        [ 0.7140, -0.5236],
        [-0.3192,  0.6508],
        [-0.3931,  0.7064],
        [ 0.7227, -0.4889],
        [ 1.1220, -1.0452],
        [ 0.1781,  0.0188],
        [ 0.7743, -0.5547],
        [-0.3026,  0.5645],
        [ 1.1948, -1.0927],
        [ 0.7731, -0.5891],
        [ 0.9534, -0.7866],
        [ 0.9204, -0.7365],
        [-0.2358,  0.5739],
        [ 0.6515, -0.4895],
        [-0.2228,  0.5047],
        [ 1.1957, -1.0428]], device='cuda:0')
pred: [1 1 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0]
real: 

 33%|███▎      | 17/52 [01:15<02:35,  4.45s/it]

logits:
tensor([[-1.4023e-01,  3.9728e-01],
        [ 5.4934e-01, -3.1390e-01],
        [ 6.2170e-01, -4.3471e-01],
        [ 1.2604e-01,  1.2456e-01],
        [ 1.0795e+00, -9.6014e-01],
        [-2.8792e-01,  5.6161e-01],
        [ 6.7054e-01, -4.5814e-01],
        [-3.1515e-01,  5.8621e-01],
        [ 8.6748e-01, -7.1402e-01],
        [ 6.7542e-02,  1.4885e-01],
        [-2.8554e-01,  5.5744e-01],
        [-2.3085e-01,  4.8761e-01],
        [ 1.1434e+00, -1.0640e+00],
        [-4.3334e-01,  7.3914e-01],
        [ 6.6318e-01, -4.6385e-01],
        [ 2.4590e-01, -3.2829e-02],
        [ 2.3054e-01, -7.1652e-04],
        [-1.2583e-01,  4.1355e-01],
        [-1.5377e-02,  3.4043e-01],
        [-1.5876e-01,  4.4431e-01],
        [ 4.7889e-01, -3.0075e-01],
        [-5.4096e-02,  3.2529e-01],
        [ 2.3426e-01, -1.4630e-02],
        [ 5.4101e-01, -3.3381e-01],
        [-3.2755e-01,  5.9093e-01],
        [-2.5380e-01,  5.8181e-01],
        [ 3.8553e-02,  2.4288e-01],
        [-2.6737e-01

 35%|███▍      | 18/52 [01:20<02:31,  4.47s/it]

logits:
tensor([[ 0.5315, -0.3450],
        [ 0.4382, -0.3223],
        [ 0.6501, -0.4852],
        [-0.1359,  0.3888],
        [-0.3259,  0.5755],
        [ 0.2489, -0.0474],
        [-0.1929,  0.4913],
        [ 0.7204, -0.6022],
        [-0.1922,  0.4875],
        [ 0.1605,  0.0317],
        [ 0.3847, -0.2246],
        [ 0.0383,  0.2943],
        [ 0.5607, -0.4102],
        [ 1.0395, -0.9272],
        [ 0.9926, -0.8469],
        [-0.3770,  0.6836],
        [ 0.8805, -0.7281],
        [ 1.1032, -0.8860],
        [ 1.0170, -0.9212],
        [ 0.0224,  0.1849],
        [-0.3055,  0.5743],
        [-0.3412,  0.6075],
        [ 0.0169,  0.2057],
        [ 0.9553, -0.8223],
        [-0.2373,  0.4958],
        [ 0.8235, -0.6514],
        [ 0.2458, -0.0456],
        [-0.1525,  0.4624],
        [ 0.0561,  0.2073],
        [-0.3210,  0.6196],
        [-0.1232,  0.4023],
        [ 0.8285, -0.6498]], device='cuda:0')
pred: [0 0 0 1 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0]
real: 

 37%|███▋      | 19/52 [01:24<02:26,  4.45s/it]

logits:
tensor([[ 1.0720, -0.8893],
        [-0.2209,  0.5166],
        [ 0.7547, -0.5434],
        [-0.1029,  0.4621],
        [ 1.2523, -1.1802],
        [-0.1515,  0.4463],
        [ 0.0027,  0.1944],
        [ 0.9902, -0.8547],
        [-0.1433,  0.4240],
        [ 0.3276, -0.1412],
        [ 0.4902, -0.3205],
        [ 0.0725,  0.1601],
        [ 1.2866, -1.1143],
        [ 0.6772, -0.4890],
        [ 0.0632,  0.1926],
        [ 0.5589, -0.3611],
        [-0.0235,  0.3241],
        [-0.2343,  0.4904],
        [ 0.0134,  0.2211],
        [ 0.4745, -0.2999],
        [ 0.2983, -0.0929],
        [ 0.9154, -0.8216],
        [-0.2756,  0.5360],
        [-0.2396,  0.5230],
        [ 0.1242,  0.0875],
        [-0.0965,  0.3117],
        [ 0.0747,  0.1968],
        [ 0.2816, -0.0792],
        [ 0.7120, -0.5648],
        [ 0.5453, -0.3346],
        [ 0.4796, -0.2750],
        [-0.4551,  0.7266]], device='cuda:0')
pred: [0 1 0 1 0 1 1 0 1 0 0 1 0 0 1 0 1 1 1 0 0 0 1 1 0 1 1 0 0 0 0 1]
real: 

 38%|███▊      | 20/52 [01:29<02:22,  4.44s/it]

logits:
tensor([[-0.2345,  0.5340],
        [ 0.2150, -0.0019],
        [ 0.9008, -0.6779],
        [-0.2102,  0.4658],
        [ 0.5922, -0.4064],
        [ 0.8332, -0.7294],
        [ 0.5054, -0.2970],
        [-0.4336,  0.7395],
        [-0.3829,  0.6643],
        [ 1.2259, -1.0546],
        [ 0.6702, -0.5299],
        [-0.4040,  0.6992],
        [ 0.1225,  0.1875],
        [-0.2857,  0.5927],
        [-0.0015,  0.2701],
        [ 1.0893, -0.9392],
        [-0.2530,  0.5775],
        [ 0.9027, -0.7243],
        [ 0.0958,  0.1403],
        [ 0.6023, -0.4324],
        [ 0.1696,  0.0735],
        [ 0.7043, -0.5336],
        [ 0.1248,  0.1688],
        [ 0.4184, -0.2093],
        [ 0.1229,  0.1326],
        [-0.1444,  0.4463],
        [-0.4440,  0.7301],
        [ 0.2458,  0.0291],
        [ 0.1658,  0.0489],
        [ 0.6468, -0.4950],
        [ 0.0797,  0.1630],
        [-0.1092,  0.4134]], device='cuda:0')
pred: [1 0 0 1 0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 0 0 0 1 0 1 1 1 0 0 0 1 1]
real: 

 40%|████      | 21/52 [01:33<02:17,  4.45s/it]

logits:
tensor([[ 0.7480, -0.6273],
        [-0.2244,  0.5629],
        [ 0.8035, -0.6873],
        [-0.1089,  0.3608],
        [-0.0752,  0.3765],
        [-0.1820,  0.4391],
        [-0.2247,  0.4930],
        [ 0.0453,  0.1872],
        [ 0.2389, -0.0310],
        [ 0.0072,  0.2936],
        [-0.2581,  0.4847],
        [-0.2457,  0.5615],
        [-0.1042,  0.3928],
        [ 0.0277,  0.2503],
        [ 0.8325, -0.6817],
        [ 0.8982, -0.7179],
        [ 0.1243,  0.1528],
        [-0.2846,  0.6556],
        [-0.0245,  0.3463],
        [ 0.5114, -0.3347],
        [ 0.4175, -0.1408],
        [-0.0251,  0.2644],
        [-0.2843,  0.5709],
        [-0.3998,  0.6834],
        [-0.3243,  0.5824],
        [-0.0882,  0.4136],
        [ 0.3010, -0.1248],
        [ 0.9959, -0.8334],
        [-0.1096,  0.3920],
        [ 0.8018, -0.6225],
        [ 0.4322, -0.2007],
        [ 0.5879, -0.3672]], device='cuda:0')
pred: [0 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 0 0 1 1 1 1 1 0 0 1 0 0 0]
real: 

 42%|████▏     | 22/52 [01:38<02:13,  4.44s/it]

logits:
tensor([[ 0.6641, -0.5538],
        [ 0.4685, -0.2604],
        [ 0.4678, -0.2488],
        [ 0.7476, -0.5859],
        [ 1.0743, -0.9193],
        [-0.2163,  0.4750],
        [-0.3116,  0.6353],
        [ 1.1252, -0.9935],
        [ 0.1017,  0.1375],
        [-0.2506,  0.5531],
        [-0.3990,  0.7123],
        [-0.3130,  0.6377],
        [ 0.9596, -0.7414],
        [ 0.1450,  0.1528],
        [ 1.1121, -0.9944],
        [-0.4020,  0.6837],
        [-0.0322,  0.2801],
        [ 0.5500, -0.4406],
        [-0.3705,  0.6821],
        [-0.3265,  0.6288],
        [-0.0078,  0.3324],
        [ 0.7983, -0.6493],
        [ 0.2580, -0.0076],
        [-0.1340,  0.4186],
        [-0.3153,  0.6247],
        [ 0.4025, -0.2086],
        [-0.2101,  0.4737],
        [-0.2643,  0.5781],
        [ 0.8930, -0.7133],
        [ 0.9728, -0.7751],
        [ 0.2845, -0.0838],
        [ 1.1091, -1.0126]], device='cuda:0')
pred: [0 0 0 0 0 1 1 0 1 1 1 1 0 1 0 1 1 0 1 1 1 0 0 1 1 0 1 1 0 0 0 0]
real: 

 44%|████▍     | 23/52 [01:42<02:08,  4.44s/it]

logits:
tensor([[ 0.6504, -0.4713],
        [-0.2794,  0.5439],
        [-0.1327,  0.4087],
        [-0.1716,  0.4941],
        [-0.0604,  0.3811],
        [ 0.1827,  0.0158],
        [ 0.4475, -0.2352],
        [ 0.9803, -0.8815],
        [ 0.2745, -0.0515],
        [ 0.6267, -0.4606],
        [ 0.1727,  0.0346],
        [-0.3169,  0.6160],
        [ 0.3930, -0.2140],
        [ 0.9705, -0.7549],
        [ 0.1506,  0.0797],
        [ 1.1517, -1.0434],
        [-0.2040,  0.4551],
        [-0.3824,  0.7150],
        [ 0.5824, -0.3965],
        [ 0.1267,  0.1145],
        [-0.0795,  0.3279],
        [-0.3922,  0.6479],
        [ 1.0946, -0.8817],
        [-0.3100,  0.6396],
        [-0.0876,  0.3727],
        [ 0.4182, -0.2189],
        [ 0.3166, -0.1036],
        [ 0.4423, -0.2084],
        [-0.2061,  0.4956],
        [ 0.9245, -0.7335],
        [ 0.5220, -0.3773],
        [-0.2517,  0.5999]], device='cuda:0')
pred: [0 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 1 0 1 1 0 0 0 1 0 0 1]
real: 

 46%|████▌     | 24/52 [01:46<02:04,  4.45s/it]

logits:
tensor([[ 0.0996,  0.2363],
        [ 0.1399,  0.0622],
        [ 0.8307, -0.7481],
        [ 0.7287, -0.4950],
        [-0.2861,  0.6060],
        [ 0.9979, -0.8622],
        [-0.3622,  0.6457],
        [-0.1818,  0.4647],
        [-0.2334,  0.5500],
        [ 0.3965, -0.2124],
        [ 0.6333, -0.4981],
        [-0.3448,  0.6213],
        [-0.0333,  0.3325],
        [ 0.8486, -0.6305],
        [ 1.2490, -1.1932],
        [-0.0545,  0.2913],
        [-0.1167,  0.4232],
        [-0.1774,  0.4482],
        [ 0.6733, -0.4973],
        [ 0.4135, -0.1640],
        [ 0.6648, -0.5217],
        [-0.2291,  0.5543],
        [ 0.4226, -0.2382],
        [-0.1021,  0.3792],
        [ 1.0061, -0.7981],
        [ 0.6069, -0.4138],
        [ 0.4029, -0.1779],
        [ 0.4251, -0.2397],
        [-0.2354,  0.4825],
        [-0.3822,  0.6887],
        [ 0.6310, -0.4710],
        [ 0.2299,  0.0343]], device='cuda:0')
pred: [1 0 0 0 1 0 1 1 1 0 0 1 1 0 0 1 1 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0]
real: 

 48%|████▊     | 25/52 [01:51<02:04,  4.61s/it]

logits:
tensor([[ 0.0232,  0.1643],
        [-0.1779,  0.4862],
        [-0.1560,  0.4755],
        [-0.2782,  0.5957],
        [ 0.2577, -0.0652],
        [ 0.3382, -0.1649],
        [ 0.6622, -0.4276],
        [ 0.6528, -0.5177],
        [-0.1313,  0.4381],
        [ 0.3899, -0.1643],
        [-0.1217,  0.3659],
        [-0.1445,  0.4014],
        [ 0.5119, -0.3052],
        [-0.1001,  0.3729],
        [ 0.1401,  0.0522],
        [-0.4627,  0.7710],
        [ 0.8315, -0.6098],
        [ 0.0464,  0.1607],
        [ 0.4686, -0.2587],
        [-0.2829,  0.5733],
        [ 1.1474, -0.9817],
        [-0.4553,  0.7185],
        [-0.2773,  0.5399],
        [ 0.6852, -0.5470],
        [-0.3285,  0.6202],
        [ 0.4609, -0.2617],
        [-0.3499,  0.6189],
        [-0.3038,  0.5893],
        [ 0.1171,  0.0854],
        [ 0.6878, -0.5015],
        [ 0.2401, -0.0503],
        [ 0.6164, -0.3807]], device='cuda:0')
pred: [1 1 1 1 0 0 0 0 1 0 1 1 0 1 0 1 0 1 0 1 0 1 1 0 1 0 1 1 0 0 0 0]
real: 

 50%|█████     | 26/52 [01:56<01:58,  4.58s/it]

logits:
tensor([[-0.1148,  0.3587],
        [ 0.2167,  0.0363],
        [-0.3094,  0.5660],
        [ 0.4319, -0.2176],
        [-0.4027,  0.6680],
        [ 0.3298, -0.0931],
        [ 0.8413, -0.7168],
        [-0.1302,  0.3760],
        [ 0.0319,  0.2016],
        [ 0.9239, -0.7389],
        [-0.0182,  0.3125],
        [ 0.7840, -0.5957],
        [ 1.0570, -0.9199],
        [ 0.7979, -0.6320],
        [ 0.6010, -0.4499],
        [ 1.2995, -1.1915],
        [ 0.2015, -0.0044],
        [-0.1083,  0.3976],
        [ 0.2607, -0.0495],
        [-0.2153,  0.5096],
        [-0.3149,  0.6236],
        [-0.2017,  0.4252],
        [ 0.1633,  0.0895],
        [ 0.1702,  0.1452],
        [ 0.2738, -0.0275],
        [ 0.8065, -0.6568],
        [-0.4573,  0.7340],
        [ 0.4284, -0.2041],
        [-0.0676,  0.3420],
        [ 0.9620, -0.7969],
        [-0.0273,  0.3429],
        [-0.0778,  0.3401]], device='cuda:0')
pred: [1 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 1 0 1 0 1 1]
real: 

 52%|█████▏    | 27/52 [02:00<01:53,  4.53s/it]

logits:
tensor([[ 0.7892, -0.5984],
        [ 0.6715, -0.4765],
        [-0.2302,  0.5342],
        [ 0.1934,  0.0464],
        [ 0.8425, -0.6929],
        [-0.1399,  0.3864],
        [-0.2829,  0.5819],
        [ 0.8133, -0.5950],
        [-0.2668,  0.5528],
        [ 0.6563, -0.4259],
        [ 0.3138, -0.1489],
        [-0.4757,  0.7968],
        [ 0.3298, -0.1344],
        [ 0.0113,  0.1673],
        [ 0.0327,  0.2347],
        [-0.2766,  0.5852],
        [ 0.9833, -0.8292],
        [ 0.4679, -0.2396],
        [-0.3841,  0.6694],
        [-0.4326,  0.7309],
        [ 1.2326, -1.1263],
        [-0.1845,  0.4723],
        [ 1.0904, -0.9887],
        [ 0.1051,  0.1709],
        [ 0.0318,  0.1950],
        [ 0.3352, -0.0734],
        [ 1.0711, -0.9548],
        [-0.3640,  0.6703],
        [-0.0201,  0.2852],
        [ 0.4635, -0.2482],
        [ 0.0767,  0.1308],
        [ 0.0794,  0.2412]], device='cuda:0')
pred: [0 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 0 0 1 1 0 1 0 1 1 0 0 1 1 0 1 1]
real: 

 54%|█████▍    | 28/52 [02:05<01:47,  4.50s/it]

logits:
tensor([[-0.1827,  0.4743],
        [ 0.7031, -0.5468],
        [ 0.3713, -0.1661],
        [ 1.1672, -1.0079],
        [-0.2579,  0.5915],
        [ 0.9812, -0.8620],
        [-0.0640,  0.3677],
        [-0.1046,  0.3820],
        [ 0.1013,  0.1552],
        [ 0.4042, -0.1891],
        [ 0.2709, -0.0685],
        [ 0.2002,  0.0301],
        [ 0.0798,  0.1980],
        [ 0.3386, -0.1577],
        [-0.2738,  0.5444],
        [ 0.3771, -0.1129],
        [ 0.8957, -0.7630],
        [-0.1400,  0.3915],
        [-0.1044,  0.3993],
        [ 0.7451, -0.6342],
        [ 0.1619,  0.0814],
        [-0.3021,  0.6088],
        [-0.1718,  0.4737],
        [ 0.5619, -0.4404],
        [ 0.8159, -0.7277],
        [ 0.4875, -0.2604],
        [ 0.4093, -0.1764],
        [ 0.9490, -0.7486],
        [ 0.6364, -0.4788],
        [-0.1263,  0.4299],
        [-0.3787,  0.6681],
        [ 0.8258, -0.6048]], device='cuda:0')
pred: [1 0 0 0 1 0 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 0 0 1 1 0]
real: 

 56%|█████▌    | 29/52 [02:09<01:43,  4.48s/it]

logits:
tensor([[ 0.2654, -0.0398],
        [ 0.8522, -0.7100],
        [ 0.7551, -0.6230],
        [-0.2446,  0.5719],
        [-0.0487,  0.3117],
        [ 0.8352, -0.7472],
        [ 0.3318, -0.1457],
        [ 0.7955, -0.6935],
        [ 0.6704, -0.5757],
        [-0.2954,  0.6286],
        [-0.0204,  0.3417],
        [ 0.1725,  0.1289],
        [ 0.4741, -0.3917],
        [-0.3091,  0.5795],
        [ 0.6592, -0.4565],
        [ 0.3170, -0.0471],
        [-0.1822,  0.5065],
        [ 0.5961, -0.4090],
        [-0.4563,  0.6998],
        [-0.2240,  0.5071],
        [ 0.1532,  0.1044],
        [-0.1706,  0.4483],
        [ 0.3479, -0.1064],
        [ 0.5403, -0.3260],
        [ 0.1442,  0.0798],
        [-0.0104,  0.3547],
        [ 0.1779,  0.0474],
        [ 0.9259, -0.8045],
        [-0.3235,  0.6325],
        [-0.3451,  0.6094],
        [ 0.0462,  0.2172],
        [ 1.0343, -0.8306]], device='cuda:0')
pred: [0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 0 1 0 1 1 0 1 0 0 0 1 0 0 1 1 1 0]
real: 

 58%|█████▊    | 30/52 [02:14<01:38,  4.47s/it]

logits:
tensor([[-4.0116e-01,  7.4830e-01],
        [-4.0389e-01,  6.6707e-01],
        [ 2.3379e-01, -3.5394e-02],
        [ 2.0355e-01,  1.0143e-02],
        [ 2.5287e-01, -3.9457e-04],
        [ 8.2662e-01, -6.9278e-01],
        [ 7.1381e-01, -5.5270e-01],
        [-1.9583e-01,  5.0114e-01],
        [-2.8292e-01,  5.7771e-01],
        [ 5.8709e-01, -3.3490e-01],
        [-1.1135e-01,  3.7079e-01],
        [ 5.9963e-01, -4.5828e-01],
        [-1.3904e-01,  4.2987e-01],
        [-3.5573e-01,  6.2058e-01],
        [-2.3086e-01,  4.8349e-01],
        [-6.9357e-02,  3.4189e-01],
        [ 1.0115e+00, -8.1211e-01],
        [ 4.7181e-01, -2.8775e-01],
        [ 4.4311e-01, -2.0531e-01],
        [ 4.2128e-01, -2.5379e-01],
        [ 8.6808e-01, -7.2562e-01],
        [-1.4085e-01,  4.4800e-01],
        [ 5.2607e-01, -3.5003e-01],
        [-2.4191e-01,  5.3838e-01],
        [ 7.3133e-01, -5.1776e-01],
        [ 3.8335e-01, -1.5138e-01],
        [-3.0832e-01,  5.9376e-01],
        [ 7.4927e-01

 60%|█████▉    | 31/52 [02:18<01:33,  4.45s/it]

logits:
tensor([[ 1.0608, -0.9638],
        [ 0.1699,  0.0057],
        [ 0.1562,  0.1224],
        [ 0.1376,  0.0909],
        [ 0.4579, -0.2764],
        [ 0.4541, -0.2738],
        [ 0.4217, -0.1985],
        [-0.0356,  0.2655],
        [ 0.6328, -0.4661],
        [-0.1692,  0.4454],
        [ 0.6177, -0.4320],
        [ 0.9552, -0.7877],
        [-0.2458,  0.5131],
        [ 0.7268, -0.5006],
        [ 1.0855, -1.0235],
        [ 0.4118, -0.1805],
        [ 0.2717, -0.0432],
        [-0.2655,  0.5495],
        [-0.0916,  0.3593],
        [ 0.6047, -0.4656],
        [-0.2808,  0.5382],
        [-0.0998,  0.4101],
        [ 0.6816, -0.4284],
        [ 1.1424, -0.9238],
        [-0.2610,  0.5293],
        [-0.0667,  0.4015],
        [ 0.1460,  0.1031],
        [ 0.0790,  0.1446],
        [-0.2011,  0.5123],
        [ 0.5456, -0.3346],
        [ 0.5963, -0.4121],
        [-0.2638,  0.5619]], device='cuda:0')
pred: [0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 1 0 0 1 1 0 1 1 0 0 1]
real: 

 62%|██████▏   | 32/52 [02:23<01:29,  4.46s/it]

logits:
tensor([[ 0.9732, -0.8026],
        [-0.2128,  0.4957],
        [ 0.1237,  0.1480],
        [ 0.2626, -0.0216],
        [-0.1272,  0.3864],
        [ 0.6758, -0.5144],
        [ 0.4265, -0.2090],
        [ 0.0487,  0.2408],
        [ 0.8258, -0.6907],
        [ 0.6681, -0.5153],
        [-0.2405,  0.6004],
        [ 0.7727, -0.5411],
        [-0.2538,  0.5253],
        [ 0.8040, -0.5784],
        [ 0.7930, -0.6350],
        [ 0.1771,  0.0691],
        [ 0.0079,  0.2685],
        [-0.2963,  0.5811],
        [-0.1483,  0.4647],
        [ 0.4597, -0.2592],
        [-0.0061,  0.3143],
        [ 0.2137, -0.0198],
        [ 0.1123,  0.1374],
        [ 0.6502, -0.4882],
        [ 0.6919, -0.5723],
        [ 0.2949, -0.0602],
        [-0.1888,  0.4816],
        [ 0.1771,  0.0590],
        [ 0.6583, -0.4908],
        [ 0.7068, -0.5835],
        [-0.1825,  0.4933],
        [-0.1922,  0.4123]], device='cuda:0')
pred: [0 1 1 0 1 0 0 1 0 0 1 0 1 0 0 0 1 1 1 0 1 0 1 0 0 0 1 0 0 0 1 1]
real: 

 63%|██████▎   | 33/52 [02:27<01:24,  4.44s/it]

logits:
tensor([[ 0.8554, -0.7229],
        [-0.1536,  0.4312],
        [ 0.3611, -0.1874],
        [ 0.6051, -0.3993],
        [ 0.4907, -0.2550],
        [ 0.5962, -0.4240],
        [ 0.6368, -0.4152],
        [-0.2588,  0.5818],
        [ 0.5524, -0.4023],
        [-0.4000,  0.6620],
        [-0.3750,  0.6467],
        [ 0.6187, -0.5215],
        [ 1.2728, -1.1458],
        [ 0.7722, -0.6322],
        [ 0.3690, -0.1425],
        [ 0.0305,  0.2392],
        [ 0.5962, -0.4414],
        [ 0.7153, -0.5496],
        [-0.1794,  0.4683],
        [-0.1638,  0.4288],
        [-0.3138,  0.6033],
        [-0.3060,  0.5852],
        [ 0.2633, -0.0725],
        [ 0.7934, -0.5877],
        [-0.2255,  0.5418],
        [-0.1230,  0.3823],
        [ 0.3863, -0.1819],
        [ 0.9967, -0.8452],
        [ 0.5566, -0.3567],
        [ 0.2428, -0.0613],
        [ 0.5905, -0.3490],
        [-0.0913,  0.3402]], device='cuda:0')
pred: [0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 1 1 1 1 0 0 1 1 0 0 0 0 0 1]
real: 

 65%|██████▌   | 34/52 [02:31<01:20,  4.45s/it]

logits:
tensor([[ 0.0123,  0.2621],
        [-0.3512,  0.6544],
        [-0.3420,  0.6612],
        [-0.3082,  0.5600],
        [ 1.0595, -0.8981],
        [-0.0480,  0.3781],
        [ 0.1341,  0.0673],
        [-0.2254,  0.5280],
        [ 0.4936, -0.3399],
        [ 0.1114,  0.1293],
        [ 0.7724, -0.5961],
        [ 0.3247, -0.0913],
        [ 0.0541,  0.2099],
        [ 0.8822, -0.7065],
        [ 0.1800,  0.0815],
        [-0.0050,  0.2307],
        [-0.0433,  0.3292],
        [ 0.5784, -0.3791],
        [ 1.0678, -0.9105],
        [-0.0466,  0.2897],
        [ 1.0049, -0.8652],
        [ 0.9520, -0.7587],
        [-0.3205,  0.6080],
        [ 0.7849, -0.5511],
        [ 0.4891, -0.2958],
        [-0.2953,  0.5944],
        [-0.3828,  0.6651],
        [-0.1928,  0.4485],
        [ 0.1861,  0.0470],
        [-0.2902,  0.5987],
        [ 0.6370, -0.4680],
        [ 0.3178, -0.0814]], device='cuda:0')
pred: [1 1 1 1 0 1 0 1 0 1 0 0 1 0 0 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 0 0]
real: 

 67%|██████▋   | 35/52 [02:36<01:15,  4.45s/it]

logits:
tensor([[ 0.4945, -0.2737],
        [-0.1065,  0.4063],
        [-0.1894,  0.5238],
        [ 0.9989, -0.8484],
        [ 0.0650,  0.1929],
        [-0.1447,  0.4594],
        [ 0.6893, -0.5685],
        [ 0.2286,  0.0089],
        [ 0.1047,  0.1774],
        [-0.1198,  0.3629],
        [ 0.9244, -0.7414],
        [ 0.1255,  0.1252],
        [-0.2812,  0.5810],
        [ 0.7776, -0.5986],
        [ 0.1158,  0.1362],
        [-0.0766,  0.3348],
        [ 0.8201, -0.6181],
        [-0.0779,  0.3918],
        [ 0.6308, -0.4101],
        [-0.2235,  0.4928],
        [ 0.7503, -0.5891],
        [ 0.5411, -0.3298],
        [-0.0093,  0.2310],
        [-0.3275,  0.6354],
        [-0.3245,  0.6241],
        [ 0.9826, -0.7606],
        [-0.0353,  0.3139],
        [-0.2590,  0.5370],
        [ 0.2046,  0.0309],
        [ 0.7235, -0.5013],
        [ 0.2337, -0.0086],
        [ 0.7734, -0.5578]], device='cuda:0')
pred: [0 1 1 0 1 1 0 0 1 1 0 0 1 0 1 1 0 1 0 1 0 0 1 1 1 0 1 1 0 0 0 0]
real: 

 69%|██████▉   | 36/52 [02:40<01:11,  4.44s/it]

logits:
tensor([[-0.2003,  0.4760],
        [-0.4016,  0.6742],
        [ 0.0031,  0.2916],
        [-0.0791,  0.3906],
        [-0.2434,  0.5338],
        [ 0.2864, -0.0777],
        [ 0.3761, -0.1965],
        [ 0.4546, -0.2457],
        [ 1.1826, -1.0426],
        [ 0.2281, -0.0156],
        [-0.0576,  0.3309],
        [-0.0178,  0.2483],
        [ 0.2709, -0.0342],
        [ 0.4781, -0.3003],
        [-0.1211,  0.4555],
        [-0.2060,  0.4810],
        [ 0.7202, -0.5352],
        [ 0.8313, -0.6443],
        [ 0.3418, -0.0934],
        [-0.1327,  0.4377],
        [-0.0036,  0.2491],
        [ 0.8114, -0.6374],
        [-0.2243,  0.5294],
        [ 0.7902, -0.6086],
        [ 0.8941, -0.6665],
        [ 0.8716, -0.7780],
        [ 0.2252, -0.0578],
        [ 0.1818,  0.0835],
        [ 0.6822, -0.5131],
        [ 0.3806, -0.2078],
        [-0.3704,  0.6585],
        [ 0.2216,  0.0067]], device='cuda:0')
pred: [1 1 1 1 1 0 0 0 0 0 1 1 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 1 0]
real: 

 71%|███████   | 37/52 [02:45<01:06,  4.45s/it]

logits:
tensor([[-0.1224,  0.4442],
        [ 0.0288,  0.2184],
        [ 0.5973, -0.4072],
        [ 0.8351, -0.6408],
        [ 0.6006, -0.4143],
        [ 0.1823,  0.1292],
        [ 0.1061,  0.1753],
        [-0.0739,  0.3554],
        [-0.2592,  0.5048],
        [ 1.0524, -0.9861],
        [ 0.7624, -0.6126],
        [ 1.0946, -0.8866],
        [ 1.1564, -1.0018],
        [ 0.5728, -0.4577],
        [ 0.3723, -0.1617],
        [ 0.1383,  0.1234],
        [ 0.0284,  0.1806],
        [ 0.2880, -0.0364],
        [ 0.8560, -0.7820],
        [ 0.1334,  0.0283],
        [-0.1738,  0.5007],
        [ 0.5710, -0.3736],
        [ 0.5157, -0.3042],
        [-0.1473,  0.4793],
        [ 0.3337, -0.0879],
        [ 1.0223, -0.8579],
        [ 1.0573, -0.8423],
        [-0.0837,  0.3150],
        [-0.0740,  0.3517],
        [-0.0324,  0.3510],
        [ 0.3301, -0.0834],
        [ 0.4081, -0.2411]], device='cuda:0')
pred: [1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1 1 1 0 0]
real: 

 73%|███████▎  | 38/52 [02:49<01:02,  4.44s/it]

logits:
tensor([[ 0.2523, -0.0183],
        [-0.2098,  0.5411],
        [-0.4309,  0.7353],
        [ 0.4999, -0.3050],
        [ 0.5193, -0.3691],
        [-0.1825,  0.4854],
        [-0.3376,  0.6150],
        [ 0.6400, -0.5245],
        [-0.3004,  0.5744],
        [-0.2095,  0.5195],
        [ 0.0111,  0.1996],
        [-0.2845,  0.5396],
        [ 0.7231, -0.4799],
        [ 0.7998, -0.6613],
        [ 0.9080, -0.6995],
        [ 1.0900, -0.9441],
        [ 0.9720, -0.8895],
        [ 0.9877, -0.8042],
        [ 0.3612, -0.1225],
        [-0.3335,  0.5958],
        [-0.0595,  0.3916],
        [-0.0174,  0.2950],
        [-0.3794,  0.6825],
        [ 0.1411,  0.0588],
        [-0.3319,  0.6052],
        [ 0.1396,  0.0911],
        [-0.1033,  0.3906],
        [-0.0182,  0.2824],
        [-0.1070,  0.4082],
        [-0.1386,  0.3760],
        [-0.2908,  0.6140],
        [ 0.6137, -0.5055]], device='cuda:0')
pred: [0 1 1 0 0 1 1 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 1 0 1 1 1 1 1 0]
real: 

 75%|███████▌  | 39/52 [02:54<00:57,  4.44s/it]

logits:
tensor([[-0.2748,  0.5623],
        [-0.0912,  0.3948],
        [ 0.3554, -0.1853],
        [-0.2098,  0.5169],
        [-0.2236,  0.5129],
        [-0.3961,  0.6648],
        [ 0.2201,  0.0152],
        [-0.4216,  0.6782],
        [-0.0709,  0.3089],
        [ 0.2403,  0.0040],
        [ 0.9079, -0.8083],
        [ 0.0281,  0.2939],
        [ 0.8575, -0.7179],
        [ 0.3015, -0.0349],
        [-0.3452,  0.6067],
        [ 0.3643, -0.2006],
        [ 0.9923, -0.7956],
        [ 0.9944, -0.7530],
        [ 0.2780,  0.0065],
        [ 0.9275, -0.7797],
        [ 0.1030,  0.2614],
        [-0.2549,  0.5720],
        [ 0.0476,  0.2208],
        [ 0.7128, -0.5611],
        [ 0.6185, -0.4794],
        [-0.0502,  0.3751],
        [ 0.7984, -0.5973],
        [-0.1798,  0.4814],
        [ 0.7037, -0.5553],
        [ 0.3839, -0.1556],
        [ 0.4971, -0.3226],
        [ 0.9234, -0.7112]], device='cuda:0')
pred: [1 1 0 1 1 1 0 1 1 0 0 1 0 0 1 0 0 0 0 0 1 1 1 0 0 1 0 1 0 0 0 0]
real: 

 77%|███████▋  | 40/52 [02:58<00:53,  4.44s/it]

logits:
tensor([[-0.4103,  0.6936],
        [-0.0505,  0.3379],
        [-0.0552,  0.2954],
        [ 0.2071,  0.0143],
        [-0.3175,  0.5590],
        [ 0.5764, -0.3990],
        [-0.3947,  0.7172],
        [-0.0312,  0.3015],
        [ 0.6530, -0.4819],
        [ 0.1760,  0.0307],
        [ 1.3012, -1.2237],
        [-0.2467,  0.6070],
        [ 0.3215, -0.1143],
        [-0.1107,  0.4324],
        [ 0.1848,  0.0533],
        [-0.0067,  0.2649],
        [ 0.2489,  0.0354],
        [-0.2498,  0.5663],
        [-0.2404,  0.5149],
        [ 1.3954, -1.2416],
        [-0.3407,  0.6027],
        [ 0.8945, -0.6636],
        [-0.2943,  0.5771],
        [-0.2716,  0.5591],
        [-0.0143,  0.2724],
        [ 1.0537, -0.9250],
        [-0.3155,  0.6089],
        [ 0.2441, -0.0582],
        [ 0.0141,  0.2573],
        [-0.3772,  0.6820],
        [ 0.8161, -0.6225],
        [ 1.1151, -1.0298]], device='cuda:0')
pred: [1 1 1 0 1 0 1 1 0 0 0 1 0 1 0 1 0 1 1 0 1 0 1 1 1 0 1 0 1 1 0 0]
real: 

 79%|███████▉  | 41/52 [03:02<00:48,  4.43s/it]

logits:
tensor([[-0.2461,  0.5878],
        [ 0.7929, -0.5703],
        [ 0.5175, -0.3193],
        [-0.0558,  0.3170],
        [ 0.3442, -0.1775],
        [ 0.6236, -0.4555],
        [ 0.7380, -0.5822],
        [-0.0719,  0.3353],
        [-0.3655,  0.6272],
        [ 0.4569, -0.2998],
        [-0.0562,  0.3417],
        [-0.4234,  0.7140],
        [-0.2287,  0.4980],
        [-0.1204,  0.3927],
        [-0.2486,  0.5935],
        [ 1.0801, -0.9314],
        [ 0.1864,  0.0027],
        [-0.1513,  0.4465],
        [-0.0931,  0.4006],
        [ 1.0567, -0.8961],
        [-0.1702,  0.5172],
        [ 0.0221,  0.2257],
        [ 0.5276, -0.3276],
        [ 0.5661, -0.3737],
        [ 0.3170, -0.0820],
        [ 0.0778,  0.1670],
        [-0.3042,  0.5771],
        [ 0.3633, -0.1501],
        [-0.3176,  0.5873],
        [-0.0835,  0.3730],
        [ 1.1951, -1.1287],
        [ 0.5094, -0.3539]], device='cuda:0')
pred: [1 0 0 1 0 0 0 1 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 0 1 1 0 1 1 0 0]
real: 

 81%|████████  | 42/52 [03:07<00:44,  4.43s/it]

logits:
tensor([[ 0.7994, -0.6084],
        [-0.2296,  0.5279],
        [-0.3903,  0.6849],
        [-0.0677,  0.3113],
        [-0.3183,  0.6026],
        [ 0.9609, -0.8797],
        [-0.2529,  0.5573],
        [-0.0576,  0.3079],
        [ 0.1864,  0.0744],
        [-0.3924,  0.6356],
        [-0.2065,  0.5181],
        [ 0.0261,  0.2297],
        [ 0.7369, -0.4965],
        [ 1.0131, -0.8400],
        [ 0.0726,  0.2465],
        [ 0.0123,  0.2568],
        [ 0.1340,  0.1655],
        [ 0.8924, -0.7884],
        [ 0.0455,  0.1771],
        [ 0.2163,  0.0325],
        [-0.2642,  0.5281],
        [ 0.8254, -0.6769],
        [ 1.1479, -0.9986],
        [-0.1251,  0.4112],
        [ 0.2055,  0.0107],
        [ 0.8550, -0.6842],
        [ 1.1232, -0.9569],
        [ 0.6762, -0.5357],
        [-0.0864,  0.3293],
        [ 0.8677, -0.6951],
        [-0.3186,  0.6264],
        [ 0.3659, -0.1621]], device='cuda:0')
pred: [0 1 1 1 1 0 1 1 0 1 1 1 0 0 1 1 1 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0]
real: 

 83%|████████▎ | 43/52 [03:11<00:39,  4.44s/it]

logits:
tensor([[ 0.4041, -0.1515],
        [ 0.3443, -0.1511],
        [-0.1953,  0.5101],
        [-0.1547,  0.4463],
        [ 0.9487, -0.7401],
        [ 0.1575,  0.1039],
        [-0.3896,  0.6482],
        [ 0.6610, -0.5356],
        [ 0.2868, -0.0584],
        [ 0.8517, -0.6142],
        [ 0.8693, -0.7707],
        [-0.1986,  0.5062],
        [-0.2751,  0.6115],
        [ 0.5837, -0.3350],
        [ 0.6288, -0.5327],
        [ 0.6916, -0.5018],
        [ 0.6896, -0.5399],
        [-0.2421,  0.5644],
        [ 0.0691,  0.1934],
        [-0.0390,  0.3089],
        [-0.1483,  0.5008],
        [-0.3906,  0.6779],
        [-0.2873,  0.5640],
        [ 0.6517, -0.4789],
        [ 0.0110,  0.2596],
        [ 0.0654,  0.1867],
        [ 0.9923, -0.7617],
        [ 0.9965, -0.8747],
        [-0.2782,  0.5751],
        [-0.1305,  0.4512],
        [-0.2415,  0.5852],
        [-0.1071,  0.3763]], device='cuda:0')
pred: [0 0 1 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1]
real: 

 85%|████████▍ | 44/52 [03:16<00:35,  4.43s/it]

logits:
tensor([[ 0.1757,  0.1296],
        [-0.0524,  0.3504],
        [-0.0915,  0.3668],
        [ 0.9780, -0.8384],
        [ 0.8902, -0.7525],
        [ 0.2743, -0.1065],
        [-0.0351,  0.2751],
        [ 0.3428, -0.1114],
        [-0.4061,  0.6583],
        [ 0.4357, -0.2096],
        [ 0.6663, -0.4587],
        [-0.2611,  0.5555],
        [-0.3106,  0.5629],
        [ 0.1315,  0.1391],
        [-0.2420,  0.5362],
        [-0.3511,  0.6273],
        [ 0.8386, -0.6239],
        [ 0.0518,  0.1946],
        [-0.3945,  0.6738],
        [-0.0516,  0.3331],
        [ 0.6988, -0.4691],
        [ 1.0643, -0.9589],
        [-0.3320,  0.6297],
        [-0.1042,  0.3886],
        [-0.3152,  0.6247],
        [ 0.3733, -0.1869],
        [ 0.0014,  0.2515],
        [ 0.8701, -0.7088],
        [ 0.9218, -0.7609],
        [ 0.0805,  0.1771],
        [ 0.0126,  0.2196],
        [-0.1010,  0.3563]], device='cuda:0')
pred: [0 1 1 0 0 0 1 0 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 1 0 0 1 1 1]
real: 

 87%|████████▋ | 45/52 [03:20<00:31,  4.44s/it]

logits:
tensor([[ 0.8899, -0.6458],
        [-0.2961,  0.5825],
        [ 0.5646, -0.2839],
        [-0.2225,  0.5308],
        [-0.1896,  0.4212],
        [ 0.5961, -0.3958],
        [-0.1845,  0.4741],
        [-0.1396,  0.3944],
        [-0.2535,  0.5110],
        [ 0.9406, -0.7787],
        [ 0.8398, -0.6557],
        [ 0.4282, -0.1826],
        [-0.0776,  0.3397],
        [ 1.1443, -1.0000],
        [-0.1173,  0.3909],
        [-0.1051,  0.3589],
        [ 1.0258, -0.9041],
        [ 0.0798,  0.1588],
        [ 0.5970, -0.3664],
        [-0.0588,  0.3159],
        [ 0.2996, -0.0444],
        [ 0.7621, -0.6223],
        [ 0.6406, -0.4640],
        [ 0.7433, -0.5748],
        [ 0.3487, -0.1578],
        [ 0.2567, -0.1005],
        [ 1.2204, -1.1336],
        [-0.1236,  0.3989],
        [-0.1963,  0.4542],
        [ 0.9493, -0.7778],
        [ 0.5558, -0.4618],
        [ 0.6389, -0.4413]], device='cuda:0')
pred: [0 1 0 1 1 0 1 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0]
real: 

 88%|████████▊ | 46/52 [03:25<00:26,  4.44s/it]

logits:
tensor([[ 0.8643, -0.6638],
        [ 0.0396,  0.2192],
        [ 0.2557, -0.0554],
        [ 0.0649,  0.2155],
        [-0.2521,  0.5395],
        [-0.0672,  0.3629],
        [ 0.9339, -0.7363],
        [ 0.0686,  0.1920],
        [-0.0593,  0.3421],
        [ 1.0646, -0.9493],
        [ 1.2553, -1.1230],
        [ 0.0988,  0.0659],
        [-0.3054,  0.5561],
        [ 1.0042, -0.8289],
        [-0.2283,  0.4878],
        [ 0.2963, -0.0563],
        [-0.3104,  0.5152],
        [-0.1660,  0.4835],
        [ 0.8761, -0.7150],
        [ 0.8741, -0.6769],
        [ 0.7194, -0.5683],
        [ 0.3717, -0.1416],
        [-0.4446,  0.7311],
        [ 0.2118, -0.0240],
        [-0.3085,  0.6305],
        [ 0.1911,  0.0418],
        [ 0.2565, -0.0385],
        [ 0.9362, -0.8041],
        [-0.2934,  0.5914],
        [ 0.5037, -0.3159],
        [ 1.0614, -0.9441],
        [ 0.1861,  0.0576]], device='cuda:0')
pred: [0 1 0 1 1 1 0 1 1 0 0 0 1 0 1 0 1 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0]
real: 

 90%|█████████ | 47/52 [03:29<00:22,  4.43s/it]

logits:
tensor([[ 0.5276, -0.3825],
        [ 0.8949, -0.7211],
        [ 0.2061,  0.0570],
        [ 0.2862, -0.0223],
        [-0.2879,  0.6328],
        [ 0.6092, -0.4353],
        [ 0.7405, -0.5658],
        [-0.1653,  0.4438],
        [ 0.0453,  0.1730],
        [ 0.3524, -0.1484],
        [ 0.2780, -0.0970],
        [-0.0945,  0.3401],
        [ 0.0072,  0.2729],
        [ 0.5679, -0.3793],
        [ 0.9032, -0.7719],
        [ 0.9663, -0.7843],
        [-0.2670,  0.5349],
        [-0.2328,  0.5553],
        [-0.0817,  0.4026],
        [ 0.8693, -0.7111],
        [-0.2313,  0.5225],
        [-0.3087,  0.5955],
        [ 0.6393, -0.4241],
        [-0.2814,  0.5459],
        [ 0.3312, -0.1421],
        [ 0.5217, -0.3471],
        [ 0.9358, -0.7515],
        [ 0.6529, -0.5092],
        [ 0.3121, -0.1670],
        [ 0.9842, -0.7567],
        [ 0.4087, -0.2199],
        [ 0.1087,  0.1022]], device='cuda:0')
pred: [0 0 0 0 1 0 0 1 1 0 0 1 1 0 0 0 1 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0]
real: 

 92%|█████████▏| 48/52 [03:34<00:17,  4.44s/it]

logits:
tensor([[-0.3633,  0.6434],
        [ 0.5984, -0.3800],
        [-0.0075,  0.2737],
        [-0.2343,  0.5151],
        [ 0.1402,  0.1315],
        [ 0.2366, -0.0029],
        [ 0.2250,  0.0029],
        [ 1.1107, -1.0055],
        [-0.3788,  0.6367],
        [ 0.3287, -0.1633],
        [ 1.3419, -1.1906],
        [ 0.0628,  0.1757],
        [ 0.7642, -0.6588],
        [ 0.8457, -0.6780],
        [ 0.1831,  0.0303],
        [ 0.0769,  0.1830],
        [ 0.0405,  0.2179],
        [ 0.8787, -0.7500],
        [ 0.3189, -0.1251],
        [-0.3319,  0.6065],
        [ 0.0120,  0.2856],
        [-0.1525,  0.4215],
        [ 0.4508, -0.1859],
        [-0.2076,  0.5021],
        [-0.3772,  0.6773],
        [-0.1392,  0.4262],
        [ 0.6966, -0.4487],
        [ 0.9142, -0.7963],
        [ 0.1940,  0.0092],
        [-0.1865,  0.4598],
        [ 0.8848, -0.7188],
        [-0.3433,  0.6743]], device='cuda:0')
pred: [1 0 1 1 0 0 0 0 1 0 0 1 0 0 0 1 1 0 0 1 1 1 0 1 1 1 0 0 0 1 0 1]
real: 

 94%|█████████▍| 49/52 [03:38<00:13,  4.44s/it]

logits:
tensor([[ 0.2812, -0.0712],
        [ 1.0114, -0.7732],
        [-0.4559,  0.7490],
        [ 1.0800, -0.9917],
        [ 0.9361, -0.8575],
        [ 1.0125, -0.8526],
        [-0.0576,  0.3200],
        [-0.0290,  0.3096],
        [-0.2841,  0.5393],
        [ 0.8007, -0.5516],
        [-0.1799,  0.4263],
        [ 0.2177,  0.0182],
        [-0.1125,  0.3630],
        [-0.2083,  0.4917],
        [ 0.3188, -0.1311],
        [ 0.1823,  0.0826],
        [-0.2133,  0.5027],
        [-0.3265,  0.6276],
        [ 0.8062, -0.6003],
        [-0.1558,  0.3919],
        [ 0.9399, -0.8124],
        [ 0.8779, -0.6967],
        [-0.1262,  0.3995],
        [-0.3523,  0.6219],
        [ 0.1907,  0.0547],
        [ 0.0169,  0.2686],
        [-0.3208,  0.6208],
        [-0.3819,  0.7009],
        [-0.3634,  0.6215],
        [ 0.7430, -0.5206],
        [ 0.2962, -0.0945],
        [ 0.4211, -0.1932]], device='cuda:0')
pred: [0 0 1 0 0 0 1 1 1 0 1 0 1 1 0 0 1 1 0 1 0 0 1 1 0 1 1 1 1 0 0 0]
real: 

 96%|█████████▌| 50/52 [03:42<00:08,  4.43s/it]

logits:
tensor([[ 0.5119, -0.4025],
        [ 0.0388,  0.2460],
        [ 0.2724, -0.1135],
        [ 0.6878, -0.4754],
        [ 0.7886, -0.5217],
        [ 0.3695, -0.1649],
        [-0.1289,  0.4043],
        [ 0.7252, -0.5739],
        [ 0.0694,  0.2312],
        [ 0.9120, -0.7584],
        [-0.3700,  0.6603],
        [ 0.0120,  0.1746],
        [ 0.5917, -0.3345],
        [-0.2067,  0.5441],
        [-0.2702,  0.5478],
        [-0.1484,  0.4192],
        [ 0.5053, -0.3557],
        [ 1.0503, -0.9825],
        [-0.2803,  0.5297],
        [ 0.8180, -0.6499],
        [ 0.0119,  0.2090],
        [ 0.7241, -0.5984],
        [ 0.4360, -0.2376],
        [ 0.1541,  0.0768],
        [-0.3271,  0.6549],
        [-0.2544,  0.5645],
        [-0.1512,  0.4202],
        [ 0.7371, -0.5952],
        [-0.4426,  0.7288],
        [ 0.7050, -0.5967],
        [ 1.0154, -0.7927],
        [ 0.3264, -0.1314]], device='cuda:0')
pred: [0 1 0 0 0 0 1 0 1 0 1 1 0 1 1 1 0 0 1 0 1 0 0 0 1 1 1 0 1 0 0 0]
real: 

 98%|█████████▊| 51/52 [03:47<00:04,  4.47s/it]

logits:
tensor([[-0.1060,  0.4276],
        [ 0.2000, -0.0119],
        [ 0.5570, -0.4207],
        [-0.2242,  0.5221],
        [-0.0836,  0.3740],
        [-0.2122,  0.4744],
        [ 1.0564, -1.0196],
        [ 0.7444, -0.6304],
        [ 0.5539, -0.4055],
        [ 0.6517, -0.4901],
        [-0.0911,  0.3782],
        [ 0.5328, -0.3625],
        [ 0.3556, -0.2233],
        [-0.2936,  0.6367],
        [ 0.8998, -0.7104],
        [-0.2913,  0.5609],
        [-0.0990,  0.3981],
        [ 1.1645, -1.0131],
        [ 0.7612, -0.5847],
        [ 0.4804, -0.2382],
        [ 0.8777, -0.7947],
        [-0.2117,  0.5059],
        [ 0.3449, -0.0773],
        [ 0.2235,  0.0348],
        [-0.3917,  0.6808],
        [-0.1897,  0.5011],
        [ 1.2086, -1.0549],
        [-0.2604,  0.5550],
        [ 1.1848, -1.0110],
        [ 0.6442, -0.4583],
        [-0.3316,  0.6187],
        [ 0.2443, -0.0268]], device='cuda:0')
pred: [1 0 0 1 1 1 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0]
real: 

100%|██████████| 52/52 [03:49<00:00,  4.41s/it]

logits:
tensor([[-0.2203,  0.5152],
        [-0.3949,  0.6615],
        [ 0.8530, -0.6013],
        [ 0.6808, -0.4814],
        [ 0.8936, -0.6998],
        [ 0.0837,  0.1973],
        [ 0.2397, -0.0516],
        [-0.3773,  0.6604],
        [ 0.9102, -0.6717],
        [-0.2235,  0.5463],
        [-0.2756,  0.5374],
        [-0.0116,  0.2670],
        [ 0.6899, -0.5265],
        [-0.2293,  0.5114],
        [ 0.6151, -0.4156]], device='cuda:0')
pred: [1 1 0 0 0 1 0 1 0 1 1 1 0 1 0]
real: [0 1 0 0 0 1 0 0 0 0 0 0 0 1 0]

	test loss: nan

	test acc: 0.6131810897435898

	test prec: 0.8698094102077625

	test rec: 0.6131810897435898

	test f1: 0.6790997402485328



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [27]:
print("pred_flat:", "\n", pred_flat)
print("\n")
print("logits", "\n", logits)

pred_flat: 
 [1 1 0 0 0 1 0 1 0 1 1 1 0 1 0]


logits 
 [[-0.2203012   0.51520926]
 [-0.39488778  0.6615116 ]
 [ 0.85299224 -0.6012564 ]
 [ 0.68075854 -0.48136777]
 [ 0.8935856  -0.69978046]
 [ 0.08367971  0.19733039]
 [ 0.23965524 -0.05155321]
 [-0.37729228  0.6603529 ]
 [ 0.9102003  -0.6717322 ]
 [-0.22349375  0.5463257 ]
 [-0.27564353  0.5373843 ]
 [-0.01159942  0.2670028 ]
 [ 0.6898548  -0.52651393]
 [-0.22931378  0.51137674]
 [ 0.61511284 -0.4155641 ]]


### printig some variabels to look into 

In [28]:
train_loss


[0.5875257253646851,
 0.6241099238395691,
 0.5964183807373047,
 0.4867846369743347,
 0.5664580464363098,
 0.6384475827217102,
 0.5234268307685852,
 0.611117422580719,
 0.5452214479446411,
 0.634705126285553,
 0.602626621723175,
 0.5830845832824707,
 0.5167552828788757,
 0.6032499074935913,
 0.6057895421981812,
 0.5802444815635681,
 0.6778051853179932,
 0.5698618292808533,
 0.6553929448127747,
 0.5920110940933228,
 0.5842699408531189,
 0.7046537399291992,
 0.617445170879364,
 0.5529775023460388,
 0.5285909175872803,
 0.6599240303039551,
 0.4906185269355774,
 0.5913090109825134,
 0.5554555654525757,
 0.6124575138092041,
 0.5329173803329468,
 0.6130095720291138,
 0.6332884430885315,
 0.5746018886566162,
 0.47806593775749207,
 0.70933997631073,
 0.5713217258453369,
 0.47686660289764404,
 0.7801812887191772,
 0.658084511756897,
 0.5817245244979858,
 0.6217321753501892,
 0.6044613718986511,
 0.6423258185386658,
 0.6924811601638794,
 0.6500186920166016,
 0.49954748153686523,
 0.63042032718658

In [29]:
train_acc 

[0.609375,
 0.625,
 0.65625,
 0.765625,
 0.625,
 0.609375,
 0.640625,
 0.625,
 0.703125,
 0.65625,
 0.765625,
 0.671875,
 0.6875,
 0.609375,
 0.640625,
 0.671875,
 0.609375,
 0.6875,
 0.640625,
 0.65625,
 0.640625,
 0.578125,
 0.625,
 0.65625,
 0.703125,
 0.640625,
 0.6875,
 0.65625,
 0.6875,
 0.71875,
 0.71875,
 0.625,
 0.703125,
 0.703125,
 0.703125,
 0.625,
 0.703125,
 0.734375,
 0.578125,
 0.59375,
 0.65625,
 0.65625,
 0.609375,
 0.640625,
 0.625,
 0.625,
 0.640625,
 0.59375,
 0.65625,
 0.796875,
 0.59375,
 0.640625,
 0.703125,
 0.6875,
 0.53125,
 0.578125,
 0.578125,
 0.609375,
 0.71875,
 0.734375,
 0.625,
 0.65625,
 0.75,
 0.640625,
 0.625,
 0.734375,
 0.6875,
 0.703125,
 0.65625,
 0.609375,
 0.625,
 0.640625,
 0.625,
 0.65625,
 0.59375,
 0.6875,
 0.546875,
 0.75,
 0.609375,
 0.734375,
 0.6875,
 0.609375,
 0.5454545454545454]

In [30]:
train_prec

[0.8091440886699507,
 0.7909919028340081,
 0.8870957051961823,
 0.9184782608695652,
 0.8299595141700405,
 0.825987539766702,
 0.8715992647058823,
 0.8442118226600985,
 0.9141301406926406,
 0.7913141025641026,
 0.8822841183574879,
 0.8886295995670995,
 0.8958333333333334,
 0.8508620689655172,
 0.7947115384615384,
 0.9017628205128205,
 0.7959577429149798,
 0.8546794871794872,
 0.885546875,
 0.9339541357370095,
 0.8082217261904762,
 0.7586649149149151,
 0.8209459459459459,
 0.866826923076923,
 0.924626245847176,
 0.7947115384615384,
 0.9189814814814815,
 0.8765624999999999,
 0.8546794871794872,
 0.9059794372294373,
 0.8722230913642053,
 0.7195945945945946,
 0.8363433441558442,
 0.8233072916666667,
 0.9595170454545454,
 0.8839743589743589,
 0.793359375,
 0.8783653846153846,
 0.7200755567338282,
 0.8692956349206349,
 0.866826923076923,
 0.765625,
 0.9191066066066066,
 0.8647836538461539,
 0.8125,
 0.8958333333333334,
 0.8702256944444444,
 0.8697916666666666,
 0.9004301619433198,
 0.84084821

In [31]:

train_rec

[0.609375,
 0.625,
 0.65625,
 0.765625,
 0.625,
 0.609375,
 0.640625,
 0.625,
 0.703125,
 0.65625,
 0.765625,
 0.671875,
 0.6875,
 0.609375,
 0.640625,
 0.671875,
 0.609375,
 0.6875,
 0.640625,
 0.65625,
 0.640625,
 0.578125,
 0.625,
 0.65625,
 0.703125,
 0.640625,
 0.6875,
 0.65625,
 0.6875,
 0.71875,
 0.71875,
 0.625,
 0.703125,
 0.703125,
 0.703125,
 0.625,
 0.703125,
 0.734375,
 0.578125,
 0.59375,
 0.65625,
 0.65625,
 0.609375,
 0.640625,
 0.625,
 0.625,
 0.640625,
 0.59375,
 0.65625,
 0.796875,
 0.59375,
 0.640625,
 0.703125,
 0.6875,
 0.53125,
 0.578125,
 0.578125,
 0.609375,
 0.71875,
 0.734375,
 0.625,
 0.65625,
 0.75,
 0.640625,
 0.625,
 0.734375,
 0.6875,
 0.703125,
 0.65625,
 0.609375,
 0.625,
 0.640625,
 0.625,
 0.65625,
 0.59375,
 0.6875,
 0.546875,
 0.75,
 0.609375,
 0.734375,
 0.6875,
 0.609375,
 0.5454545454545454]

In [32]:

train_f1

[0.6628313166234514,
 0.6757246376811595,
 0.7358035714285714,
 0.8042068506817426,
 0.6883604505632039,
 0.6903300243817485,
 0.6949816335350043,
 0.6820175438596492,
 0.7716011184451778,
 0.6969353864734299,
 0.8046723300970873,
 0.7475591309130913,
 0.7301136363636364,
 0.6751559251559253,
 0.688652073732719,
 0.7402934818756237,
 0.6685387864823349,
 0.7344336670838548,
 0.7238658133054684,
 0.7547605580693816,
 0.6846279890521463,
 0.6356363231363231,
 0.6820652173913044,
 0.720703125,
 0.7795631067961165,
 0.688652073732719,
 0.7461670838548186,
 0.7278061224489796,
 0.7344336670838548,
 0.7766071428571427,
 0.7795347341337907,
 0.6524086378737542,
 0.7454792983039574,
 0.7375096006144395,
 0.7885558252427185,
 0.7117346938775511,
 0.7293120730620731,
 0.769873271889401,
 0.6323564593301436,
 0.6776439299123904,
 0.720703125,
 0.6911231884057971,
 0.7079730628533422,
 0.7081439393939393,
 0.6763157894736842,
 0.7214285714285713,
 0.672654029966417,
 0.6982142857142857,
 0.7278645

In [33]:
val_accuracy

[]

In [34]:
val_loss

[0.702800452709198,
 0.5250018835067749,
 0.7090986371040344,
 0.6518169641494751,
 0.5457489490509033,
 0.5479598641395569,
 0.532227635383606,
 0.5785199403762817,
 0.761624276638031,
 0.4775333106517792,
 0.5654107928276062,
 0.5686037540435791,
 0.5682851076126099,
 0.5628700256347656,
 0.5394037961959839,
 0.46262815594673157,
 0.4998567998409271,
 0.5535305738449097,
 0.46309375762939453,
 0.4850519001483917,
 0.5246384739875793,
 0.6416539549827576,
 0.7478864192962646,
 0.6019914746284485,
 0.6380285620689392,
 0.54725581407547,
 0.6271235346794128,
 0.5508907437324524,
 0.5519490242004395,
 0.6776984930038452,
 0.5541132092475891,
 0.6372450590133667,
 0.664983332157135,
 0.5127569437026978,
 0.5362096428871155,
 0.8163884878158569,
 0.5023529529571533,
 0.537419855594635,
 0.48321136832237244,
 0.5684155821800232,
 0.5723525881767273,
 0.5718719959259033]

In [35]:
val_acc

[0.46875,
 0.71875,
 0.53125,
 0.5,
 0.6875,
 0.625,
 0.625,
 0.53125,
 0.46875,
 0.71875,
 0.65625,
 0.65625,
 0.65625,
 0.625,
 0.65625,
 0.75,
 0.6875,
 0.59375,
 0.6875,
 0.65625,
 0.6875,
 0.59375,
 0.59375,
 0.5625,
 0.5,
 0.65625,
 0.65625,
 0.65625,
 0.65625,
 0.625,
 0.625,
 0.5625,
 0.53125,
 0.71875,
 0.71875,
 0.5625,
 0.71875,
 0.625,
 0.65625,
 0.6875,
 0.625,
 0.3333333333333333]

In [36]:
val_prec

[1.0,
 0.8838068181818183,
 0.7887254901960784,
 0.8568627450980392,
 0.9479166666666666,
 0.8729757085020242,
 0.9464285714285714,
 0.9013157894736842,
 0.798828125,
 0.9134615384615384,
 0.8765624999999999,
 0.7984375,
 0.8272058823529411,
 0.8531746031746031,
 0.9471153846153846,
 0.9166666666666666,
 0.8958333333333334,
 0.689453125,
 0.9278846153846154,
 0.9263392857142857,
 0.8624999999999999,
 0.7669117647058824,
 0.7834821428571429,
 0.84375,
 0.8333333333333334,
 0.8272058823529411,
 0.8765624999999999,
 0.898538961038961,
 0.9713541666666666,
 0.6953125,
 0.925,
 0.8242647058823529,
 0.861328125,
 0.8995535714285714,
 0.8674242424242423,
 0.6972402597402597,
 0.9488636363636364,
 0.9464285714285714,
 0.9263392857142857,
 0.7491883116883117,
 0.7909919028340081,
 0.1111111111111111]

In [37]:
val_rec

[0.46875,
 0.71875,
 0.53125,
 0.5,
 0.6875,
 0.625,
 0.625,
 0.53125,
 0.46875,
 0.71875,
 0.65625,
 0.65625,
 0.65625,
 0.625,
 0.65625,
 0.75,
 0.6875,
 0.59375,
 0.6875,
 0.65625,
 0.6875,
 0.59375,
 0.59375,
 0.5625,
 0.5,
 0.65625,
 0.65625,
 0.65625,
 0.65625,
 0.625,
 0.625,
 0.5625,
 0.53125,
 0.71875,
 0.71875,
 0.5625,
 0.71875,
 0.625,
 0.65625,
 0.6875,
 0.625,
 0.3333333333333333]

In [38]:
val_f1

[0.6382978723404256,
 0.7751696832579186,
 0.6096491228070176,
 0.5954545454545455,
 0.7678571428571429,
 0.703125,
 0.7187499999999999,
 0.598356309650053,
 0.5737573099415204,
 0.7662703379224031,
 0.7278061224489796,
 0.7014236545682103,
 0.693936877076412,
 0.6884057971014493,
 0.7437074829931973,
 0.7916666666666666,
 0.7301136363636364,
 0.6141666666666667,
 0.7526041666666666,
 0.7272371714643305,
 0.7395833333333334,
 0.6382890365448505,
 0.6493421052631578,
 0.634090909090909,
 0.5757575757575758,
 0.693936877076412,
 0.7278061224489796,
 0.7449095022624433,
 0.7646116138763197,
 0.6386639676113361,
 0.7010869565217391,
 0.6193181818181819,
 0.6239035087719299,
 0.7572368421052632,
 0.7642857142857143,
 0.6171875,
 0.791289592760181,
 0.7187499999999999,
 0.7272371714643305,
 0.7086352657004831,
 0.6757246376811595,
 0.16666666666666666]

In [39]:
# test_accuracy

In [40]:
test_loss

[]

In [41]:
test_acc

[0.625,
 0.71875,
 0.65625,
 0.59375,
 0.78125,
 0.625,
 0.5625,
 0.5625,
 0.625,
 0.34375,
 0.53125,
 0.71875,
 0.71875,
 0.5,
 0.625,
 0.65625,
 0.46875,
 0.5625,
 0.59375,
 0.53125,
 0.5,
 0.59375,
 0.6875,
 0.59375,
 0.625,
 0.5625,
 0.5625,
 0.65625,
 0.59375,
 0.71875,
 0.71875,
 0.625,
 0.71875,
 0.5,
 0.625,
 0.6875,
 0.71875,
 0.5625,
 0.65625,
 0.6875,
 0.5,
 0.5625,
 0.5625,
 0.53125,
 0.59375,
 0.59375,
 0.71875,
 0.71875,
 0.5625,
 0.625,
 0.65625,
 0.6666666666666666]

In [42]:
test_prec

[0.925,
 0.8995535714285714,
 0.9263392857142857,
 0.8485294117647059,
 0.8251811594202898,
 0.7909919028340081,
 0.9708333333333333,
 0.84375,
 0.8618421052631579,
 0.7598958333333333,
 0.970703125,
 0.8838068181818183,
 0.9488636363636364,
 0.8809523809523809,
 0.8531746031746031,
 0.898538961038961,
 0.8519345238095238,
 0.7757352941176471,
 0.8485294117647059,
 0.861328125,
 0.7916666666666666,
 0.751953125,
 0.9278846153846154,
 0.7834821428571429,
 0.90625,
 0.8898809523809523,
 0.9227941176470589,
 0.8577935222672064,
 0.8021255060728746,
 0.8418522267206477,
 0.9296875,
 0.8531746031746031,
 0.8536458333333333,
 0.8828125,
 0.90625,
 0.847419028340081,
 0.9488636363636364,
 0.9027777777777778,
 0.9263392857142857,
 0.8611111111111112,
 0.8568627450980392,
 0.9227941176470589,
 0.9227941176470589,
 0.9013157894736842,
 0.8021255060728746,
 0.8021255060728746,
 0.8674242424242423,
 0.8875,
 0.9453125,
 0.7757936507936508,
 0.8577935222672064,
 0.875]

In [43]:
test_rec

[0.625,
 0.71875,
 0.65625,
 0.59375,
 0.78125,
 0.625,
 0.5625,
 0.5625,
 0.625,
 0.34375,
 0.53125,
 0.71875,
 0.71875,
 0.5,
 0.625,
 0.65625,
 0.46875,
 0.5625,
 0.59375,
 0.53125,
 0.5,
 0.59375,
 0.6875,
 0.59375,
 0.625,
 0.5625,
 0.5625,
 0.65625,
 0.59375,
 0.71875,
 0.71875,
 0.625,
 0.71875,
 0.5,
 0.625,
 0.6875,
 0.71875,
 0.5625,
 0.65625,
 0.6875,
 0.5,
 0.5625,
 0.5625,
 0.53125,
 0.59375,
 0.59375,
 0.71875,
 0.71875,
 0.5625,
 0.625,
 0.65625,
 0.6666666666666666]

In [44]:
test_f1

[0.7010869565217391,
 0.7572368421052632,
 0.7272371714643305,
 0.6616959064327486,
 0.7964285714285715,
 0.6757246376811595,
 0.6901041666666666,
 0.634090909090909,
 0.6523279352226721,
 0.4502253446447509,
 0.6632509386733416,
 0.7751696832579186,
 0.791289592760181,
 0.5485829959514169,
 0.6884057971014493,
 0.7449095022624433,
 0.5658222591362128,
 0.6221590909090909,
 0.6616959064327486,
 0.6239035087719299,
 0.5425101214574899,
 0.6286452810180276,
 0.7526041666666666,
 0.6493421052631578,
 0.6863636363636364,
 0.671875,
 0.6460227272727272,
 0.7143304130162703,
 0.6623904881101376,
 0.7486842105263157,
 0.7772959183673469,
 0.6884057971014493,
 0.7557102628285357,
 0.6183574879227053,
 0.6863636363636364,
 0.7297705314009661,
 0.791289592760181,
 0.6287878787878787,
 0.7272371714643305,
 0.7064777327935222,
 0.5954545454545455,
 0.6460227272727272,
 0.6460227272727272,
 0.598356309650053,
 0.6623904881101376,
 0.6623904881101376,
 0.7642857142857143,
 0.7495847176079735,
 0.666