## A model build using TweetBERT to classify tweet as causal or non-causal 

In [1]:
import pandas as pd
import numpy as np
import spacy 
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification, AutoTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import random
import os
import torch.nn.functional as F
import torch
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
import transformers
from tqdm import tqdm, trange
#from google.colab import drive, files
import io

In [2]:
#uploaded = files.upload()
#data = pd.read_excel(io.BytesIO(uploaded['Causality + hypoglycemia.xlsx']), sheet_name=">5000_samples_")


# data = pd.read_excel("/home/adrian/Downloads/Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")
data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")

  data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")


In [3]:
print("Total count:", data.shape[0])
data = data[data["Causal association"].notnull()]
print("Labeled count:", data.shape[0])

data.head()

Total count: 5456
Labeled count: 5000


Unnamed: 0,id,text,full_text,Intent,Cause,Effect,Causal association,Charline association0=no;1=yes,Remarks
0,908171203029868545,"tonight , I learned my older girl will back he...","tonight , I learned my older girl will back he...",,,,0.0,,
1,1203645589214367745,USER USER I knew diabetes and fibromyalgia wer...,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0.0,,
2,1310596731063525376,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,mS,,,0.0,,
3,1125198453167022085,USER Cheers ! Have one for this diabetic too !,USER Cheers ! Have one for this diabetic too !,mS,,,0.0,,
4,1248600944138268673,USER Additionally the medicines are being char...,USER Additionally the medicines are being char...,,medicines are being charged at MRP,costing much higher,1.0,,


### Interrater-reliabilty measure

In [4]:
from sklearn.metrics import cohen_kappa_score

charline = data[data["Charline association0=no;1=yes"].notnull()]
coder1 = charline["Causal association"].values
coder2 = charline["Charline association0=no;1=yes"]
score = cohen_kappa_score(coder1,coder2)
print('Cohen\'s Kappa:',score)

Cohen's Kappa: 0.823719518166683


### Preprocessing

In [5]:
data["Causal association"].value_counts()

0.0    3720
1.0    1280
Name: Causal association, dtype: int64

In [6]:
# Normalisation for BertTweet
from nltk.tokenize import TweetTokenizer
from emoji import demojize
import re

tokenizer = TweetTokenizer()

# https://huggingface.co/vinai/bertweet-base
def normalizeToken(token):
    lowercased_token = token.lower()
    if token.startswith("@"):
        return "@USER"
    elif lowercased_token.startswith("http") or lowercased_token.startswith("www"):
        return "HTTPURL"
    elif len(token) == 1:
        return demojize(token)
    else:
        if token == "’":
            return "'"
        elif token == "…":
            return "..."
        else:
            return token

def normalizeTweet(tweet):

    tokens = tokenizer.tokenize(tweet.replace("’", "'").replace("…", "..."))
    normTweet = " ".join([normalizeToken(token) for token in tokens])

    normTweet = normTweet.replace("cannot ", "can not ").replace("n't ", " n't ").replace("n 't ", " n't ").replace("ca n't", "can't").replace("ai n't", "ain't")
    normTweet = normTweet.replace("'m ", " 'm ").replace("'re ", " 're ").replace("'s ", " 's ").replace("'ll ", " 'll ").replace("'d ", " 'd ").replace("'ve ", " 've ")
    normTweet = normTweet.replace(" p . m .", "  p.m.") .replace(" p . m ", " p.m ").replace(" a . m .", " a.m.").replace(" a . m ", " a.m ")

    normTweet = re.sub(r",([0-9]{2,4}) , ([0-9]{2,4})", r",\1,\2", normTweet)
    normTweet = re.sub(r"([0-9]{1,3}) / ([0-9]{2,4})", r"\1/\2", normTweet)
    normTweet = re.sub(r"([0-9]{1,3})- ([0-9]{2,4})", r"\1-\2", normTweet)
    
    return " ".join(normTweet.split())

In [7]:
def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    sentences = text.split("<stop>")
    #sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    sentences = [s  for s in sentences if s != ""]
    return sentences


def create_training_data(data):
    tweets = []
    causal_labels = []
    
    for i, row in data.iterrows():
#        print("\n")
#        print(row["full_text"])
#        print(row["Intent"], "->", row["Causal association"])
       
        sentences = split_into_sentences(row["full_text"])
        intents = set(str(row["Intent"]).strip().split(";"))
        causes = str(row["Cause"]).strip().split(";")
        effects = str(row["Effect"]).strip().split(";")
#        print("\tintents:", intents)
#        print("\tcauses: '{}'".format(causes))
#        print("\teffects: '{}'".format(effects))
        
        if set({"nan"}) == intents or set({" "}) == intents: # single sentence
#            print("\tA: add => causality: {}".format(row["Causal association"]))
            tweets.append(row["full_text"])
            causal_labels.append(row["Causal association"])

        elif (
             set({"q"}) == intents 
          or set({"joke"}) == intents 
          or set({"q", "joke"}) == intents
          or set({"joke", "mS"}) == intents 
          or set({"neg"}) == intents 
          or set({"neg", "msS"}) == intents
          or set({"neg", "mS"}) == intents
          or set({"neg", "msS", "mE"}) == intents
          or set({"q", "joke", "mS"}) == intents
          or set({"q", "msS", "neg"}) == intents
          or set({"neg", "mC"}) == intents
          or set({"mC", "joke", "msS"}) == intents
          or set({"joke", "mE"}) == intents
        ):
#            print("\tB ignore")
            continue
        elif (  
             set({"mS"}) == intents # multiple sentences (possible that cause and effect in different sentences -> ignore)
          or set({"q", "mS"}) == intents # multiple sentences or question
          or set({"mS", "mE"}) == intents
          or set({"mC", "mS"}) == intents
          or set({"mC", "mS", "mE"}) == intents
          or set({"q", "mC", "mS"}) == intents
          or set({"q", "mC", "mS", "mE"}) == intents
            
        ):
            for sent in sentences:
#                print(sent)
                if sent[-1] != "?": # ignore questions
#                    print("\tC: add, causality => 0")
                    tweets.append(sent)
                    causal_labels.append(0)  
#                else:
#                    print("\tD: ignore")
        elif (
            set({"msS"}) == intents # multiple sentences with cause and effect in single sentence
         or set({"q", "msS"}) == intents # msS and a question
         or set({"msS", "mE"}) == intents # msS with several effects
         or set({"mC", "msS"}) == intents
         or set({"mE"}) == intents # multiple effects
         or set({"mC"}) == intents # multiple causes
         or set({"mC", "msS", "mE"}) == intents
         or set({"mC", "mE"}) == intents
         or set({"q", "mC", "mE"}) == intents   
         or set({"q", "mC", "msS"}) == intents
        ):

            if row["Causal association"] != 1: #TEST
                print(sentences)
                print("1) ERROR: Causal association should be 1 !!!!")      
                print()
        
            for sent in sentences:
#                print("sub sent:", sent)
                if sent[-1] != "?": # ignore question
                    
                    existCause = False
                    for cause in causes:
                        if cause in sent:
#                            print("Cause: <{}> in sent".format(cause))
                            existCause = True
                    
                    existEffect = False
                    for effect in effects:
                        if effect in sent:
#                            print("Effect: <{}> in sent".format(effect))
                            existEffect = True
                            
                    if existCause and existEffect:
                        tweets.append(sent)
                        causal_labels.append(row["Causal association"])
#                        print("E: add with Cause + effect => association: {}".format(row["Causal association"]))
                    else:
                        tweets.append(sent)
                        causal_labels.append(0)
#                        print("F: not both C + E in sentence, association: 0")
#                else:
#                    print("H: question in sentence")
            if row["Causal association"] == 0:
                print(sentences)
                print("H: should not have causality == 0")
#        else:
#            print("END: should not happen")

    
    return pd.DataFrame({"tweet" : tweets, "Causal association" : causal_labels})

trainingData = create_training_data(data)
print("N sentences:", trainingData.shape)
trainingData = trainingData[trainingData["tweet"].str.split(" ").str.len() > 3] # keep only sentence with more than 3 tokens
print("N sentences with > 3 words:", trainingData.shape)

N sentences: (9779, 2)
N sentences with > 3 words: (8235, 2)


In [8]:
# let's print a few example of our cleaned and normalized traingin dataset
trainingData.head()

Unnamed: 0,tweet,Causal association
0,"tonight , I learned my older girl will back he...",0.0
1,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,0.0
2,I'm a trans woman .,0.0
3,"Both of us could use a world where "" brave and...",0.0
4,"Make a world where people can just be , withou...",0.0


In [9]:
trainingData["Causal association"].value_counts()

0.0    7218
1.0    1017
Name: Causal association, dtype: int64

### Training

In [10]:
# text = trainingData["tweet"].map(normalizeTweet).values.tolist()
# labels = trainingData["Causal association"].values.tolist()
# # first split the data into traingin and testing label in the ratio of 80:20
# train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2)
# # split the new training data (80% of actual data) to get train and validation set
# train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2)



# print("Train: {}".format(len(train_texts)))
# print("Val: {}".format(len(val_texts)))
# print("Test: {}".format(len(test_texts)))


In [11]:
## when not straitifyign the data: 

text = trainingData["tweet"].map(normalizeTweet).values.tolist()
labels = trainingData["Causal association"].values.tolist()
# first split the data into training and testing label in the ratio of 80:20
train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2)
# split the new training data (80% of actual data) to get train and validation set
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2)

data_count_info = pd.Series(labels).value_counts(normalize=True)
train_count_info = pd.Series(train_labels).value_counts(normalize=True)
val_count_info = pd.Series(val_labels).value_counts(normalize=True)
test_count_info = pd.Series(test_labels).value_counts(normalize=True)

# for class-imbalanced dataset, the class weight for a ith class
# to be specified for balancing in the loss function is given by:
# weight[i] = num_samples / (num_classes * num_samples[i])
# since train_count_info obtained above has fraction of samples
# for ith class, hence the corresponding weight calculation is:
class_weight = (1/train_count_info)/len(train_count_info)

print("All: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(labels), *data_count_info.round(4).to_list()))
print("Train: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(train_labels), *train_count_info.round(4).to_list()))
print("Val: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(val_labels), *val_count_info.round(4).to_list()))
print("Test: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(test_labels), *test_count_info.round(4).to_list()))
print("Balancing class wts: for 0 = {}, for 1 = {}".format(
    *class_weight.round(4).to_list()))

All: Count = 8235, % of 0 = 0.8765, % of 1 = 0.1235
Train: Count = 5270, % of 0 = 0.875, % of 1 = 0.125
Val: Count = 1318, % of 0 = 0.8672, % of 1 = 0.1328
Test: Count = 1647, % of 0 = 0.8889, % of 1 = 0.1111
Balancing class wts: for 0 = 0.5715, for 1 = 3.9985


In [12]:
####################### Stratified splits ####################

text = trainingData["tweet"].map(normalizeTweet).values.tolist()
labels = trainingData["Causal association"].values.tolist()
# first split the data into training and testing label in the ratio of 80:20
train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2, stratify=labels)
# split the new training data (80% of actual data) to get train and validation set
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2, stratify=train_labels)



data_count_info = pd.Series(labels).value_counts(normalize=True)
train_count_info = pd.Series(train_labels).value_counts(normalize=True)
val_count_info = pd.Series(val_labels).value_counts(normalize=True)
test_count_info = pd.Series(test_labels).value_counts(normalize=True)

# for class-imbalanced dataset, the class weight for a ith class
# to be specified for balancing in the loss function is given by:
# weight[i] = num_samples / (num_classes * num_samples[i])
# since train_count_info obtained above has fraction of samples
# for ith class, hence the corresponding weight calculation is:
class_weight = (1/train_count_info)/len(train_count_info)

print("All: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(labels), *data_count_info.round(4).to_list()))
print("Train: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(train_labels), *train_count_info.round(4).to_list()))
print("Val: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(val_labels), *val_count_info.round(4).to_list()))
print("Test: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(test_labels), *test_count_info.round(4).to_list()))
print("Balancing class wts: for 0 = {}, for 1 = {}".format(
    *class_weight.round(4).to_list()))

All: Count = 8235, % of 0 = 0.8765, % of 1 = 0.1235
Train: Count = 5270, % of 0 = 0.8765, % of 1 = 0.1235
Val: Count = 1318, % of 0 = 0.8763, % of 1 = 0.1237
Test: Count = 1647, % of 0 = 0.8767, % of 1 = 0.1233
Balancing class wts: for 0 = 0.5705, for 1 = 4.0476


In [13]:
# train_texts[1]

In [14]:
# Transform labels + encodings into Pytorch DataSet object (including __len__, __getitem__)
class TweetDataSet(torch.utils.data.Dataset):
    def __init__(self, text, labels, tokenizer):
        self.text = text
        self.labels = labels
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.text, padding=True, truncation=True, return_token_type_ids=True)
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]
        return {
                "input_ids" : torch.tensor(ids[idx], dtype=torch.long)
              , "attention_mask" : torch.tensor(mask[idx], dtype=torch.long)
              , "token_type_ids" : torch.tensor(token_type_ids[idx], dtype=torch.long)
              , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
        }      

    def __len__(self):
        return len(self.labels)

    
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")

train_dataset = TweetDataSet(train_texts, train_labels, tokenizer)
val_dataset = TweetDataSet(val_texts, val_labels, tokenizer)
test_dataset = TweetDataSet(test_texts, test_labels, tokenizer)
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

# put data to batches (iterables)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


5270
1318
1647


In [15]:
# !jupyter nbextension enable --py widgetsnbextension

In [16]:
# # from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
# # from transformers import AutoModelForSequenceClassification
# from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef

# def compute_metrics(pred, labels):
#     #labels = pred.label_ids
#     #preds = pred.predictions.argmax(-1)
#     precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='binary')
#     acc = accuracy_score(labels, pred)
#     return {
#         'accuracy': acc,
#         'f1': f1,
#         'precision': precision,
#         'recall': recall
#     }



# class CausalityBERT(torch.nn.Module):
#     """ Model Bert"""
#     def __init__(self):
#         super(CausalityBERT, self).__init__()
#         self.num_labels = 2
#         self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
#         self.dropout = torch.nn.Dropout(0.3)
#         self.linear = torch.nn.Linear(768, self.num_labels)
#         # softmax layer missing ? -> Vivek
        
#     def forward(self, input_ids, attention_mask, token_type_ids):
#         _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False)
#         output_2 = self.dropout(output_1)
#         logits = self.linear(output_2)
#         return logits


# ## Model parameters
# batchsize_train = 16
# lr = 5e-5
# adam_eps = 1e-8
# epochs = 3 
# num_warmup_steps = 0
# num_training_steps = len(train_loader)*epochs

# # Store our loss and accuracy for plotting
# train_loss_set = []
# learning_rate = []


# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# #model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base")
# model = CausalityBERT()
# model.to(device)

# # fine-tune only the task-specific parameters -> Vivek? 
# for param in model.bert.parameters():
#     param.requires_grad = False
    
# model.to(device)
# model.train() # set model to training mode


# optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
# scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0; after a warmup period durnig which it increases linearly from 0 to the initial lr set in the optimizer
# loss_fn = CrossEntropyLoss()

# for epoch in trange(1, epochs+1, desc='Epoch'):
#     print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")
    
#     #calculate total loss for this epoch
#     batch_loss = 0
    
#     for batch in tqdm(train_loader):
#         #print("batch:", batch)
#         optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
#         input_ids = batch['input_ids'].to(device)
#         attention_mask = batch['attention_mask'].to(device)
#         token_type_ids = batch["token_type_ids"].to(device)
#         labels = batch['labels'].to(device)
#         #logits = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # forward pass
#         logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        
        
        
        
#         loss = loss_fn(logits, labels)
#         print("loss:", loss)
#         #loss = outputs[0]
#         loss.backward() # backward pass
#         optim.step()    # update parameters and take a step up using the computed gradient
#         scheduler.step()# update learning rate scheduler
#         batch_loss += loss.item() # update tracking variables
        
#     avg_train_loss = batch_loss / len(train_loader) # calculate avg loss over training data

#     # store the current learning rate
#     for param_group in optim.param_groups:
#         print("\n\tCurrent Learning rate: ", param_group['lr'])
#         learning_rate.append(param_group['lr'])
    
#     train_loss_set.append(avg_train_loss)
#     print(F'\n\tAverage Training loss: {avg_train_loss}')

    
#     ## ---- Validation ------
#     model.eval() # put model in evaluation mode for validation set
    
#     eval_accuracy, eval_mcc_accuracy, nb_eval_steps = 0, 0, 0 # Tracking variables
#     val_accuracy = []
#     val_loss = []
#     val_acc = []
#     val_prec = []
#     val_rec = []
#     val_f1 = []
    
    
#     # Evaluate data for one epoch
#     for batch in tqdm(validation_loader):
#         batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
#         b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
        
#         with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
#           #  logits = model(b_inputs_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids) # forward pass, calculates logit predictions
#           logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions
            
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())
        
#         # move logits and labels to CPU
#         logits = logits.detach().to('cpu').numpy()
#         label_ids = b_labels.to('cpu').numpy()
        
#         pred_flat = np.argmax(logits, axis=1).flatten() # convert logits to list of predicted labels
#         labels_flat = label_ids.flatten()
#         eval_accuracy += accuracy_score(labels_flat, pred_flat)
#         eval_mcc_accrucay += matthews_corrcoef(labels_flat, pred_flat)        
#         metrics = compute_metrics(pred_flat, labels_flat)
#         val_acc.append(metrics["accuracy"])
#         val_prec.append(metrics["precision"])
#         val_rec.append(metrics["recall"])
#         val_f1.append(metrics["f1"])
#         nb_eval_steps += 1
        
#     print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation loss: {np.mean(val_loss)}')
#     print(F'\n\tValidation acc: {np.mean(val_acc)}')
#     print(F'\n\tValidation prec: {np.mean(val_prec)}')
#     print(F'\n\tValidation rec: {np.mean(val_rec)}')
#     print(F'\n\tValidation f1: {np.mean(val_f1)}')

    


In [17]:
# from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
# from transformers import AutoModelForSequenceClassification

## we are measuring weighted metrics - as our dataset is unbalanced 
# Calculate metrics for each label, and find their average weighted by support
#(the number of true instances for each label). 
#This alters ‘macro’ to account for label imbalance; 
# it can result in an F-score that is not between precision and recall.


from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef

def compute_metrics(pred, labels):
    #labels = pred.label_ids
    #preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='weighted')
    acc = accuracy_score(labels, pred)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }



class CausalityBERT(torch.nn.Module):
    """ Model Bert"""
    def __init__(self):
        super(CausalityBERT, self).__init__()
        self.num_labels = 2
        self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
        self.dropout = torch.nn.Dropout(0.3)
        self.linear1 = torch.nn.Linear(768, 256)
        self.linear2 = torch.nn.Linear(256, self.num_labels)
        self.softmax = torch.nn.Softmax(-1)
        
    def forward(self, input_ids, attention_mask, token_type_ids):
        _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False) # if output 1 is our cls token        
        output_2 = self.dropout(output_1)
        output_3 = self.linear1(output_2)  
        output_4 = self.dropout(output_3)
        output_5 = self.linear2(output_4)
        # cross entory will take care of the logits - we don't need if we are usign cross entropy for loss function 
        # if doing yourself - use nll loss and logSoftmax 
#         logit = self.softmax(output_5)
        return output_5



    


## Model parameters


In [18]:
batchsize_train = 16
lr = 1e-5
adam_eps = 1e-8
epochs = 35
num_warmup_steps = 0
num_training_steps = len(train_loader)*epochs

In [19]:


# Store our loss and accuracy for plotting : where is accuracy : it is loss an dlearning rate 
train_loss_set = []
learning_rate = []


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = CausalityBERT()
model.to(device)

# fine-tune only the task-specific parameters -> Vivek? 
## can we just update last layer of BERT 
for param in model.bert.parameters():
    param.requires_grad = False


optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0; after a warmup period durnig which it increases linearly from 0 to the initial lr set in the optimizer
# loss_fn = CrossEntropyLoss()

## penalising more for class with less number of exaplmes 
loss_fn = CrossEntropyLoss(torch.tensor(class_weight.to_list()).to(device))


You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.3.output.dense.bias', 'roberta.encoder.layer.5.attention.self.query.weight', 'roberta.encoder.layer.4.attention.self.key.weight', 'roberta.encoder.layer.8.intermediate.dense.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.dense.bias', 'roberta.encoder.layer.5.attention.output.dense.weight', 'roberta.encoder.layer.2.attention.self.key.weight', 'roberta.encoder.layer.2.attention.output.dense.bias', 'roberta.encoder.layer.9.attention.output.LayerNorm.bias', 'roberta.encoder.layer.6.attention.output.dense.weight', 'roberta.encoder.layer.2.attention.self.query.weight', 'roberta.encoder.layer.1.intermediate.dense.weight', 'roberta.encoder.layer.4.output.dense.weight'

In [20]:



for epoch in trange(1, epochs+1, desc='Epoch'):
    print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")
    
    #calculate total loss for this epoch
#     epoch_loss = 0
    
    ########### training eval metrics #############################
    tr_accuracy, tr_mcc_accuracy, nb_tr_steps = 0, 0, 0 # Tracking variables
    train_accuracy = []
    train_loss = []
    train_acc = []
    train_prec = []
    train_rec = []
    train_f1 = []
    
    ###################################################
    
    for batch in tqdm(train_loader):
        #print("batch:", batch)
        optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch["token_type_ids"].to(device)
        labels = batch['labels'].to(device)
        #logits = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # forward pass
#         outputs = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        ###########################################################################
        model.train()
        ## this is output_5 based on our model defined above 
        logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        #############################################################################
        loss = loss_fn(logits, labels)
        print("loss:", loss)
        #loss = outputs[0]
        loss.backward() # backward pass
        optim.step()    # update parameters and take a step up using the computed gradient
        scheduler.step()# update learning rate scheduler
#         epoch_loss += loss.item() # update tracking variables
        train_loss.append(loss.item())
        
    
    ############# Training Accuracy Measure ##########################################
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())

        # move logits and labels to CPU
        logits = logits.detach().to('cpu').numpy()
        label_ids = labels.to('cpu').numpy()

        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()

# #         tr_accuracy += accuracy_score(labels_flat, pred_flat) # this is same as metric accuracy 
#         tr_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  


        metrics = compute_metrics(pred_flat, labels_flat)
#         print(metrics)
        
        
#         train_loss.append(loss)
        train_acc.append(metrics["accuracy"])
        train_prec.append(metrics["precision"])
        train_rec.append(metrics["recall"])
        train_f1.append(metrics["f1"])
        
#         batch_loss
#         nb_tr_steps += 1
        
#     batch_loss = np.mean(train_loss)
#     train_loss_set.append(batch_loss)
        
    
#     print(F'\n\tTraining Accuracy: {tr_accuracy / nb_tr_steps}') # accuracy is calculated twice 
#     print(F'\n\tTraining MCC Accuracy: {tr_mcc_accuracy / nb_tr_steps}') # eval M
    
#     train_loss_set.append(batch_loss) # it should be either loss or batch_loss
    
    
#     train_loss = train_loss.to('cpu').numpy()
    print(F'\n\tTrain loss: {np.mean(train_loss)}')
    print(F'\n\ttrain acc: {np.mean(train_acc)}')
    print(F'\n\ttraining prec: {np.mean(train_prec)}')
    print(F'\n\ttraining rec: {np.mean(train_rec)}')
    print(F'\n\ttraining f1: {np.mean(train_f1)}')
    
    
    ###################################################################################
    #     avg_train_loss = batch_loss / len(train_loader) # calculate avg loss over training data : we don't need it as you did it for each sample
    #     train_loss_set.append(avg_train_loss)
    #     print(F'\n\tAverage Training loss: {avg_train_loss}')
    
#     train_loss_set.append(train_loss)
#     print(F'\n\tAverage Training loss per epoch: {train_loss}')


    # store the current learning rate
    for param_group in optim.param_groups:
        print("\n\tCurrent Learning rate: ", param_group['lr'])
        learning_rate.append(param_group['lr'])
    


    
    ## ---- Validation ------
#     model.eval() # put model in evaluation mode for validation set
    
    eval_accuracy, eval_mcc_accuracy, nb_eval_steps = 0, 0, 0 # Tracking variables
    val_accuracy = []
    val_loss = []
    val_acc = []
    val_prec = []
    val_rec = []
    val_f1 = []
    
    
    # Evaluate data for one epoch
    for batch in tqdm(validation_loader):
        batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
        b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
        
        with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
          #  logits = model(b_inputs_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids) # forward pass, calculates logit predictions
#             outputs = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
            ##################################################################################
            model.eval()
            logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 

                        
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())
        
        # move logits and labels to CPU
        logits = logits.detach().to('cpu').numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()
        
#         eval_accuracy += accuracy_score(labels_flat, pred_flat)
#         eval_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  
        
        
        metrics = compute_metrics(pred_flat, labels_flat)
        val_acc.append(metrics["accuracy"])
        val_prec.append(metrics["precision"])
        val_rec.append(metrics["recall"])
        val_f1.append(metrics["f1"])
#         nb_eval_steps += 1
        
#     print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy / nb_eval_steps}') # eval M
    
#     val_loss = val_loss.to('cpu').numpy()
    print(F'\n\tValidation loss: {np.mean(val_loss)}')
    print(F'\n\tValidation acc: {np.mean(val_acc)}')
    print(F'\n\tValidation prec: {np.mean(val_prec)}')
    print(F'\n\tValidation rec: {np.mean(val_rec)}')
    print(F'\n\tValidation f1: {np.mean(val_f1)}')


Epoch:   0%|          | 0/35 [00:00<?, ?it/s]
  0%|          | 0/83 [00:00<?, ?it/s][A



  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)


loss: 


  1%|          | 1/83 [00:23<32:21, 23.68s/it][A

tensor(0.7116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:46<31:19, 23.21s/it][A

tensor(0.7131, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:08<30:10, 22.64s/it][A

tensor(0.7321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:30<29:18, 22.26s/it][A

tensor(0.7346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:52<28:46, 22.14s/it][A

tensor(0.7106, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:14<28:18, 22.06s/it][A

tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:36<28:07, 22.21s/it][A

tensor(0.6963, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:58<27:48, 22.25s/it][A

tensor(0.7171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:20<27:18, 22.14s/it][A

tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:42<26:50, 22.06s/it][A

tensor(0.6779, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:04<26:23, 21.99s/it][A

tensor(0.6993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:26<25:54, 21.90s/it][A

tensor(0.6807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:48<25:34, 21.92s/it][A

tensor(0.6978, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:10<25:13, 21.93s/it][A

tensor(0.6759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:31<24:47, 21.88s/it][A

tensor(0.6789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:53<24:23, 21.85s/it][A

tensor(0.7127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:15<23:56, 21.77s/it][A

tensor(0.7027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:36<23:30, 21.69s/it][A

tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:58<23:07, 21.68s/it][A

tensor(0.7468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:19<22:43, 21.64s/it][A

tensor(0.6892, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:41<22:20, 21.62s/it][A

tensor(0.6728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [08:03<21:59, 21.63s/it][A

tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:24<21:35, 21.60s/it][A

tensor(0.6960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:46<21:12, 21.56s/it][A

tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:07<20:48, 21.52s/it][A

tensor(0.6458, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:29<20:28, 21.55s/it][A

tensor(0.6998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:50<20:05, 21.52s/it][A

tensor(0.6931, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:12<19:46, 21.57s/it][A

tensor(0.6865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:34<19:26, 21.60s/it][A

tensor(0.7395, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:55<19:06, 21.63s/it][A

tensor(0.7343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:17<18:44, 21.63s/it][A

tensor(0.7095, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:38<18:21, 21.60s/it][A

tensor(0.7011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [12:00<17:59, 21.60s/it][A

tensor(0.7180, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:21<17:37, 21.57s/it][A

tensor(0.7410, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:43<17:16, 21.60s/it][A

tensor(0.7047, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [13:05<16:56, 21.62s/it][A

tensor(0.6979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:26<16:33, 21.59s/it][A

tensor(0.6665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:48<16:11, 21.59s/it][A

tensor(0.6899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:10<15:50, 21.60s/it][A

tensor(0.6979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:31<15:30, 21.63s/it][A

tensor(0.7319, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:53<15:08, 21.63s/it][A

tensor(0.7113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:15<14:47, 21.65s/it][A

tensor(0.6973, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:36<14:26, 21.65s/it][A

tensor(0.7078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:58<14:04, 21.65s/it][A

tensor(0.6993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:20<13:42, 21.65s/it][A

tensor(0.6830, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:41<13:21, 21.67s/it][A

tensor(0.7208, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [17:03<12:59, 21.66s/it][A

tensor(0.7278, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:25<12:39, 21.69s/it][A

tensor(0.7449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:47<12:19, 21.75s/it][A

tensor(0.6995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:08<11:58, 21.76s/it][A

tensor(0.6714, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:30<11:38, 21.81s/it][A

tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:52<11:16, 21.83s/it][A

tensor(0.7080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:14<10:55, 21.84s/it][A

tensor(0.6954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:36<10:31, 21.79s/it][A

tensor(0.7005, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:57<10:07, 21.70s/it][A

tensor(0.7397, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [20:18<09:42, 21.57s/it][A

loss: tensor(0.6835, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [20:40<09:18, 21.49s/it][A

loss: tensor(0.6687, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [21:01<08:56, 21.46s/it][A

tensor(0.6959, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [21:22<08:34, 21.42s/it][A

loss: tensor(0.7316, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [21:44<08:13, 21.48s/it][A

loss: tensor(0.7241, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [22:06<07:53, 21.52s/it][A

loss: tensor(0.7110, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [22:27<07:33, 21.62s/it][A

loss: tensor(0.6998, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [22:49<07:13, 21.67s/it][A

loss: tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:11<06:52, 21.72s/it][A

tensor(0.6829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:33<06:30, 21.68s/it][A

tensor(0.7084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:54<06:07, 21.64s/it][A

tensor(0.7170, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:16<05:46, 21.64s/it][A

tensor(0.7109, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:37<05:23, 21.58s/it][A

tensor(0.7057, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:59<05:01, 21.53s/it][A

tensor(0.7299, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:20<04:39, 21.50s/it][A

tensor(0.6776, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:42<04:17, 21.50s/it][A

tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [26:03<03:56, 21.53s/it][A

tensor(0.7421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:25<03:36, 21.60s/it][A

tensor(0.7219, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:47<03:14, 21.62s/it][A

tensor(0.6853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:08<02:52, 21.61s/it][A

tensor(0.7286, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:30<02:31, 21.61s/it][A

tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:52<02:09, 21.64s/it][A

tensor(0.7133, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:13<01:48, 21.68s/it][A

tensor(0.6656, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:35<01:26, 21.73s/it][A

tensor(0.6950, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:57<01:05, 21.78s/it][A

tensor(0.6863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:19<00:43, 21.79s/it][A

tensor(0.6823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:41<00:21, 21.82s/it][A

tensor(0.6576, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:48<00:00, 21.55s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7042, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.7011539526732571

	train acc: 0.5440169769989047

	training prec: 0.7841020621257179

	training rec: 0.5440169769989047

	training f1: 0.6204530012039815

	Current Learning rate:  9.714285714285715e-06



  2%|▏         | 1/42 [00:02<01:57,  2.87s/it][A
  5%|▍         | 2/42 [00:05<01:53,  2.83s/it][A
  7%|▋         | 3/42 [00:08<01:50,  2.84s/it][A
 10%|▉         | 4/42 [00:11<01:47,  2.82s/it][A
 12%|█▏        | 5/42 [00:14<01:44,  2.83s/it][A
 14%|█▍        | 6/42 [00:17<01:42,  2.85s/it][A
 17%|█▋        | 7/42 [00:19<01:38,  2.83s/it][A
 19%|█▉        | 8/42 [00:22<01:36,  2.83s/it][A
 21%|██▏       | 9/42 [00:25<01:33,  2.84s/it][A
 24%|██▍       | 10/42 [00:28<01:30,  2.84s/it][A
 26%|██▌       | 11/42 [00:31<01:27,  2.83s/it][A
 29%|██▊       | 12/42 [00:33<01:23,  2.80s/it][A
 31%|███       | 13/42 [00:36<01:20,  2.77s/it][A
 33%|███▎      | 14/42 [00:39<01:17,  2.77s/it][A
 36%|███▌      | 15/42 [00:42<01:14,  2.77s/it][A
 38%|███▊      | 16/42 [00:44<01:11,  2.76s/it][A
 40%|████      | 17/42 [00:47<01:09,  2.77s/it][A
 43%|████▎     | 18/42 [00:50<01:06,  2.77s/it][A
 45%|████▌     | 19/42 [00:53<01:03,  2.75s/it][A
 48%|████▊     | 20/42 [00:55<01:00,  2


	Validation loss: 0.6891052964187804

	Validation acc: 0.6939484126984127

	Validation prec: 0.8255954772972949

	Validation rec: 0.6939484126984127

	Validation f1: 0.7425054435609219
loss: 


  1%|          | 1/83 [00:21<30:03, 22.00s/it][A

tensor(0.7176, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:33, 21.90s/it][A

tensor(0.7089, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:05<29:11, 21.89s/it][A

tensor(0.7481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:27<28:47, 21.86s/it][A

tensor(0.7209, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:49<28:24, 21.85s/it][A

tensor(0.7363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:11<28:03, 21.86s/it][A

tensor(0.6911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:33<27:42, 21.87s/it][A

tensor(0.7031, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:55<27:25, 21.94s/it][A

tensor(0.7000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:17<27:04, 21.96s/it][A

tensor(0.7069, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:39<26:42, 21.95s/it][A

tensor(0.7132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:01<26:22, 21.98s/it][A

tensor(0.6822, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:23<25:57, 21.94s/it][A

tensor(0.7098, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:44<25:34, 21.92s/it][A

tensor(0.6761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:06<25:10, 21.89s/it][A

tensor(0.6897, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:28<24:43, 21.82s/it][A

tensor(0.7072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:49<24:16, 21.74s/it][A

tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:11<23:52, 21.70s/it][A

tensor(0.7174, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:33<23:25, 21.63s/it][A

tensor(0.6873, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:54<22:58, 21.53s/it][A

tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:15<22:34, 21.51s/it][A

tensor(0.7015, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:37<22:13, 21.51s/it][A

tensor(0.7348, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:58<21:52, 21.51s/it][A

tensor(0.7262, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:20<21:29, 21.50s/it][A

tensor(0.7218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:41<21:05, 21.44s/it][A

tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:02<20:41, 21.41s/it][A

tensor(0.7034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:24<20:19, 21.40s/it][A

tensor(0.7207, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:45<19:59, 21.43s/it][A

tensor(0.6824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:07<19:35, 21.38s/it][A

tensor(0.7008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:28<19:18, 21.45s/it][A

tensor(0.7145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:50<18:57, 21.47s/it][A

tensor(0.6990, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:11<18:36, 21.46s/it][A

tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:33<18:15, 21.48s/it][A

tensor(0.6862, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:54<17:56, 21.54s/it][A

tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:17<17:49, 21.83s/it][A

tensor(0.6843, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:39<17:32, 21.93s/it][A

tensor(0.6881, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [13:01<17:10, 21.92s/it][A

tensor(0.7203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:23<16:46, 21.89s/it][A

tensor(0.6353, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:44<16:22, 21.84s/it][A

tensor(0.6854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:06<15:59, 21.80s/it][A

tensor(0.6821, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:28<15:36, 21.79s/it][A

tensor(0.7066, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:50<15:14, 21.77s/it][A

tensor(0.7267, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:11<14:52, 21.77s/it][A

tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:33<14:30, 21.77s/it][A

tensor(0.6714, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:55<14:09, 21.78s/it][A

tensor(0.7270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:17<13:47, 21.77s/it][A

tensor(0.6694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:39<13:25, 21.78s/it][A

tensor(0.7127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [17:00<13:05, 21.81s/it][A

tensor(0.7012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:22<12:43, 21.83s/it][A

tensor(0.6824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:44<12:21, 21.82s/it][A

tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [18:06<11:59, 21.79s/it][A

tensor(0.7410, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:28<11:38, 21.83s/it][A

tensor(0.6762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:50<11:20, 21.96s/it][A

tensor(0.7543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:12<11:01, 22.06s/it][A

tensor(0.6676, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:35<10:41, 22.12s/it][A

tensor(0.6973, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:57<10:20, 22.16s/it][A

tensor(0.6717, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [20:19<09:55, 22.06s/it][A

loss: tensor(0.7134, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6606, device='cuda:0', grad_fn=<NllLossBackward>)


 69%|██████▊   | 57/83 [20:40<09:31, 22.00s/it][A
 70%|██████▉   | 58/83 [21:02<09:08, 21.93s/it][A

loss: tensor(0.7333, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [21:24<08:44, 21.87s/it][A

loss: tensor(0.6829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:46<08:23, 21.88s/it][A

tensor(0.7673, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [22:08<08:01, 21.90s/it][A

tensor(0.6968, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:30<07:39, 21.90s/it][A

tensor(0.6975, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:52<07:18, 21.92s/it][A

tensor(0.7382, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:14<06:56, 21.91s/it][A

tensor(0.6751, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:35<06:34, 21.91s/it][A

tensor(0.6878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:57<06:12, 21.94s/it][A

tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:20<05:51, 22.00s/it][A

tensor(0.7432, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:42<05:29, 22.00s/it][A

tensor(0.7259, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [25:04<05:08, 22.02s/it][A

tensor(0.6891, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:26<04:45, 21.99s/it][A

tensor(0.7378, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:48<04:23, 21.99s/it][A

tensor(0.6906, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [26:10<04:02, 22.04s/it][A

tensor(0.6974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:32<03:40, 22.06s/it][A

tensor(0.6998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:54<03:18, 22.05s/it][A

tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:16<02:56, 22.05s/it][A

tensor(0.6800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:38<02:34, 22.06s/it][A

tensor(0.7105, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [28:00<02:11, 22.00s/it][A

tensor(0.7132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:22<01:49, 21.99s/it][A

tensor(0.7251, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [28:44<01:27, 21.95s/it][A

loss: tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [29:05<01:05, 21.91s/it][A

loss: tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [29:28<00:44, 22.01s/it][A

loss: tensor(0.6722, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [29:50<00:21, 21.99s/it][A

loss: tensor(0.6695, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:57<00:00, 21.66s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.7013868560273964

	train acc: 0.5476108981380066

	training prec: 0.7852849497474153

	training rec: 0.5476108981380066

	training f1: 0.6256536685156806

	Current Learning rate:  9.42857142857143e-06


  _warn_prf(average, modifier, msg_start, len(result))

  2%|▏         | 1/42 [00:02<01:53,  2.76s/it][A
  5%|▍         | 2/42 [00:05<01:48,  2.72s/it][A
  7%|▋         | 3/42 [00:08<01:47,  2.77s/it][A
 10%|▉         | 4/42 [00:11<01:44,  2.76s/it][A
 12%|█▏        | 5/42 [00:13<01:40,  2.73s/it][A
 14%|█▍        | 6/42 [00:16<01:39,  2.75s/it][A
 17%|█▋        | 7/42 [00:19<01:37,  2.77s/it][A
 19%|█▉        | 8/42 [00:22<01:34,  2.77s/it][A
 21%|██▏       | 9/42 [00:24<01:30,  2.75s/it][A
 24%|██▍       | 10/42 [00:27<01:28,  2.76s/it][A
 26%|██▌       | 11/42 [00:30<01:25,  2.77s/it][A
 29%|██▊       | 12/42 [00:33<01:22,  2.76s/it][A
 31%|███       | 13/42 [00:35<01:19,  2.75s/it][A
 33%|███▎      | 14/42 [00:38<01:16,  2.72s/it][A
 36%|███▌      | 15/42 [00:41<01:13,  2.72s/it][A
 38%|███▊      | 16/42 [00:43<01:10,  2.72s/it][A
 40%|████      | 17/42 [00:46<01:07,  2.71s/it][A
 43%|████▎     | 18/42 [00:49<01:05,  2.72s/it][A
 45%|████▌     | 19/42 [00:52<01:02


	Validation loss: 0.6813950268995195

	Validation acc: 0.8772321428571429

	Validation prec: 0.7757573984735023

	Validation rec: 0.8772321428571429

	Validation f1: 0.8224298904310859



  1%|          | 1/83 [00:21<30:00, 21.96s/it][A

loss: tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:43<29:33, 21.89s/it][A

loss: tensor(0.7246, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:05<29:11, 21.90s/it][A

loss: tensor(0.7577, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:27<28:46, 21.86s/it][A

loss: tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:49<28:25, 21.87s/it][A

loss: tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:11<28:02, 21.85s/it][A

tensor(0.7045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:32<27:36, 21.80s/it][A

tensor(0.7429, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:54<27:10, 21.73s/it][A

tensor(0.6647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:16<26:43, 21.67s/it][A

tensor(0.7065, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:37<26:20, 21.65s/it][A

tensor(0.7305, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:59<25:56, 21.62s/it][A

tensor(0.6943, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:20<25:36, 21.65s/it][A

tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:42<25:15, 21.65s/it][A

tensor(0.6792, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [05:04<25:03, 21.79s/it][A

tensor(0.7049, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:26<24:48, 21.89s/it][A

tensor(0.6874, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:48<24:30, 21.95s/it][A

tensor(0.6806, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:11<24:12, 22.01s/it][A

tensor(0.6307, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:33<23:53, 22.05s/it][A

tensor(0.6985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:55<23:30, 22.04s/it][A

tensor(0.6461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:17<23:07, 22.02s/it][A

tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:38<22:41, 21.96s/it][A

tensor(0.6945, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [08:00<22:19, 21.96s/it][A

tensor(0.7151, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:22<21:59, 21.99s/it][A

tensor(0.6672, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:44<21:37, 21.98s/it][A

tensor(0.6718, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:07<21:16, 22.01s/it][A

tensor(0.6761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:29<20:55, 22.03s/it][A

tensor(0.6943, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:51<20:32, 22.00s/it][A

tensor(0.6797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:13<20:10, 22.00s/it][A

tensor(0.6654, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:35<19:48, 22.01s/it][A

tensor(0.7041, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:56<19:24, 21.97s/it][A

tensor(0.6887, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:18<19:00, 21.93s/it][A

tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:40<18:34, 21.86s/it][A

tensor(0.6757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [12:02<18:09, 21.79s/it][A

tensor(0.7340, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:23<17:46, 21.77s/it][A

tensor(0.6891, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:45<17:22, 21.72s/it][A

tensor(0.7141, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [13:07<17:02, 21.75s/it][A

tensor(0.6548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:29<16:43, 21.81s/it][A

tensor(0.7050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:51<16:24, 21.88s/it][A

tensor(0.6733, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:13<16:03, 21.89s/it][A

tensor(0.7252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:35<15:40, 21.88s/it][A

tensor(0.7008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:56<15:14, 21.77s/it][A

tensor(0.6922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:18<14:49, 21.68s/it][A

tensor(0.7279, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:39<14:24, 21.61s/it][A

tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [16:01<14:03, 21.62s/it][A

tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:22<13:42, 21.64s/it][A

tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:44<13:22, 21.69s/it][A

tensor(0.7036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [17:06<13:01, 21.70s/it][A

tensor(0.7076, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [17:27<12:38, 21.66s/it][A

loss: tensor(0.6538, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:49<12:16, 21.66s/it][A

loss: tensor(0.6574, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [18:11<11:54, 21.66s/it][A

loss: tensor(0.6814, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [18:32<11:33, 21.67s/it][A

loss: tensor(0.6749, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:54<11:13, 21.74s/it][A

loss: tensor(0.6951, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [19:16<10:52, 21.76s/it][A

loss: tensor(0.6940, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [19:38<10:30, 21.73s/it][A

loss: tensor(0.6666, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [20:00<10:08, 21.73s/it][A

tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:21<09:45, 21.68s/it][A

tensor(0.6676, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:43<09:25, 21.74s/it][A

tensor(0.6502, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [21:05<09:03, 21.73s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [21:26<08:41, 21.74s/it][A

tensor(0.6864, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [21:48<08:19, 21.71s/it][A

loss: tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [22:10<07:58, 21.76s/it][A

tensor(0.6718, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [22:32<07:37, 21.78s/it][A

loss: tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:53<07:14, 21.70s/it][A

tensor(0.7693, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:15<06:51, 21.63s/it][A

tensor(0.6919, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [23:36<06:27, 21.51s/it][A

loss: tensor(0.6636, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [23:57<06:04, 21.43s/it][A

loss: tensor(0.7402, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:19<05:42, 21.39s/it][A

tensor(0.6802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:40<05:22, 21.48s/it][A

tensor(0.7124, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [25:02<04:59, 21.42s/it][A

tensor(0.6911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:23<04:37, 21.35s/it][A

tensor(0.7402, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:44<04:16, 21.36s/it][A

tensor(0.6894, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [26:05<03:54, 21.34s/it][A

tensor(0.7506, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:27<03:33, 21.31s/it][A

tensor(0.7250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:48<03:11, 21.33s/it][A

tensor(0.7218, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [27:09<02:50, 21.31s/it][A

loss: tensor(0.7259, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [27:31<02:29, 21.29s/it][A

loss: tensor(0.6636, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [27:52<02:07, 21.29s/it][A

loss: tensor(0.6985, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [28:13<01:46, 21.26s/it][A

loss: tensor(0.6866, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [28:34<01:25, 21.25s/it][A

loss: tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [28:55<01:03, 21.20s/it][A

loss: tensor(0.7056, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:16<00:42, 21.20s/it][A

tensor(0.6916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:38<00:21, 21.20s/it][A

tensor(0.6564, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:45<00:00, 21.51s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6662, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6899293438497797

	train acc: 0.6544872672508214

	training prec: 0.7990837863999263

	training rec: 0.6544872672508214

	training f1: 0.7094851106404492

	Current Learning rate:  9.142857142857144e-06



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.63s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.61s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.62s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.61s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:34<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.60s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6784008968444097

	Validation acc: 0.8154761904761905

	Validation prec: 0.8221980292654746

	Validation rec: 0.8154761904761905

	Validation f1: 0.8145719059745893
loss: 


  1%|          | 1/83 [00:20<28:36, 20.93s/it][A

tensor(0.6882, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:11, 20.89s/it][A

tensor(0.6747, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:53, 20.92s/it][A

tensor(0.7018, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:34, 20.94s/it][A

tensor(0.7214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:13, 20.95s/it][A

tensor(0.6995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:51, 20.93s/it][A

tensor(0.6986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:30, 20.92s/it][A

tensor(0.6706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:08, 20.91s/it][A

tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:49, 20.95s/it][A

tensor(0.7055, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:27, 20.93s/it][A

tensor(0.6955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:11, 21.00s/it][A

tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:49, 20.97s/it][A

tensor(0.7091, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:28, 20.98s/it][A

tensor(0.7158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:06, 20.96s/it][A

tensor(0.6743, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:43, 20.94s/it][A

tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:24, 20.96s/it][A

tensor(0.6865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:02, 20.95s/it][A

tensor(0.6951, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:44, 20.99s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:25, 21.03s/it][A

tensor(0.6930, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<22:10, 21.12s/it][A

tensor(0.7055, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:49, 21.11s/it][A

tensor(0.6907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:27, 21.10s/it][A

tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<21:01, 21.03s/it][A

tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:37, 20.98s/it][A

tensor(0.6898, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:44<20:14, 20.94s/it][A

tensor(0.6099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:05<19:50, 20.88s/it][A

tensor(0.7230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:26<19:30, 20.90s/it][A

tensor(0.7253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:47<19:12, 20.95s/it][A

tensor(0.7119, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:08<18:51, 20.96s/it][A

tensor(0.7329, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:29<18:31, 20.98s/it][A

tensor(0.7184, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:50<18:13, 21.02s/it][A

tensor(0.6940, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:11<17:53, 21.05s/it][A

tensor(0.7324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:32<17:34, 21.08s/it][A

tensor(0.7151, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:53<17:13, 21.09s/it][A

tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:52, 21.08s/it][A

tensor(0.6908, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:29, 21.04s/it][A

tensor(0.7028, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<16:05, 20.99s/it][A

tensor(0.6689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:43, 20.96s/it][A

tensor(0.6562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:21, 20.94s/it][A

tensor(0.6640, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:00, 20.93s/it][A

tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:38, 20.92s/it][A

tensor(0.7027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:41<14:17, 20.92s/it][A

tensor(0.6934, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:01<13:56, 20.90s/it][A

tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:22<13:34, 20.89s/it][A

tensor(0.6960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:43<13:13, 20.88s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:04<12:51, 20.85s/it][A

tensor(0.6998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:25<12:30, 20.85s/it][A

tensor(0.6404, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:46<12:09, 20.84s/it][A

tensor(0.6519, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:07<11:50, 20.90s/it][A

tensor(0.7084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:28<11:30, 20.93s/it][A

tensor(0.6960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:48<11:09, 20.92s/it][A

tensor(0.6671, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:09<10:48, 20.90s/it][A

tensor(0.7230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:30<10:27, 20.93s/it][A

tensor(0.6735, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:51<10:07, 20.94s/it][A

tensor(0.7006, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:12<09:47, 20.97s/it][A

tensor(0.7284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:33<09:25, 20.95s/it][A

tensor(0.6927, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:54<09:05, 20.96s/it][A

tensor(0.7068, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:15<08:43, 20.95s/it][A

tensor(0.7253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:36<08:23, 21.00s/it][A

tensor(0.7153, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:57<08:04, 21.05s/it][A

tensor(0.6709, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:19<07:44, 21.10s/it][A

tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:40<07:23, 21.10s/it][A

tensor(0.6715, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:01<07:00, 21.03s/it][A

tensor(0.7163, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:21<06:38, 20.95s/it][A

loss: tensor(0.6740, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:42<06:16, 20.90s/it][A

loss: tensor(0.6610, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [23:03<05:54, 20.84s/it][A

loss: tensor(0.6884, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:24<05:32, 20.81s/it][A

loss: tensor(0.7037, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:44<05:11, 20.76s/it][A

loss: tensor(0.7211, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [24:05<04:50, 20.72s/it][A

loss: tensor(0.6905, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:26<04:29, 20.70s/it][A

loss: tensor(0.7292, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:46<04:08, 20.72s/it][A

loss: tensor(0.7014, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [25:07<03:47, 20.72s/it][A

loss: tensor(0.6842, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:28<03:27, 20.74s/it][A

loss: tensor(0.6800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:49<03:06, 20.74s/it][A

tensor(0.6968, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:09<02:46, 20.75s/it][A

tensor(0.6971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:30<02:25, 20.75s/it][A

tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:51<02:04, 20.77s/it][A

tensor(0.6873, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [27:12<01:43, 20.77s/it][A

loss: tensor(0.6803, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:33<01:23, 20.80s/it][A

loss: tensor(0.6518, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:53<01:02, 20.85s/it][A

loss: tensor(0.7037, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [28:14<00:41, 20.90s/it][A

loss: tensor(0.7064, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:35<00:20, 20.90s/it][A

loss: tensor(0.6706, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:43<00:00, 20.76s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6910, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.690463739705373

	train acc: 0.5858262595837898

	training prec: 0.8009672608380858

	training rec: 0.5858262595837898

	training f1: 0.6563676752050217

	Current Learning rate:  8.857142857142858e-06



  2%|▏         | 1/42 [00:02<01:51,  2.72s/it][A
  5%|▍         | 2/42 [00:05<01:49,  2.74s/it][A
  7%|▋         | 3/42 [00:08<01:46,  2.73s/it][A
 10%|▉         | 4/42 [00:10<01:43,  2.73s/it][A
 12%|█▏        | 5/42 [00:13<01:41,  2.75s/it][A
 14%|█▍        | 6/42 [00:16<01:39,  2.76s/it][A
 17%|█▋        | 7/42 [00:19<01:36,  2.75s/it][A
 19%|█▉        | 8/42 [00:22<01:34,  2.77s/it][A
 21%|██▏       | 9/42 [00:24<01:31,  2.78s/it][A
 24%|██▍       | 10/42 [00:27<01:28,  2.77s/it][A
 26%|██▌       | 11/42 [00:30<01:26,  2.79s/it][A
 29%|██▊       | 12/42 [00:33<01:23,  2.77s/it][A
 31%|███       | 13/42 [00:35<01:19,  2.74s/it][A
 33%|███▎      | 14/42 [00:38<01:16,  2.73s/it][A
 36%|███▌      | 15/42 [00:41<01:13,  2.73s/it][A
 38%|███▊      | 16/42 [00:43<01:10,  2.70s/it][A
 40%|████      | 17/42 [00:46<01:07,  2.70s/it][A
 43%|████▎     | 18/42 [00:49<01:04,  2.70s/it][A
 45%|████▌     | 19/42 [00:51<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:54<00:58,  2


	Validation loss: 0.6726480523745219

	Validation acc: 0.7872023809523809

	Validation prec: 0.8583566271465111

	Validation rec: 0.7872023809523809

	Validation f1: 0.8134554038550915



  1%|          | 1/83 [00:20<28:29, 20.85s/it][A

loss: tensor(0.7160, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:41<28:07, 20.84s/it][A

loss: tensor(0.6610, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:02<27:43, 20.79s/it][A

loss: tensor(0.7155, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:23<27:25, 20.83s/it][A

loss: tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:44<27:08, 20.88s/it][A

loss: tensor(0.7229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:51, 20.93s/it][A

tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:35, 20.99s/it][A

tensor(0.7027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:14, 20.99s/it][A

tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:51, 20.96s/it][A

tensor(0.6843, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:30, 20.96s/it][A

tensor(0.6662, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:12, 21.00s/it][A

tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:51, 21.01s/it][A

tensor(0.6642, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:28, 20.98s/it][A

tensor(0.6576, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:08, 20.99s/it][A

tensor(0.7049, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:52, 21.07s/it][A

tensor(0.6940, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:28, 21.02s/it][A

tensor(0.6546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:07, 21.02s/it][A

tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:46, 21.02s/it][A

tensor(0.6593, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:24, 21.01s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<22:03, 21.01s/it][A

tensor(0.6408, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:41, 20.99s/it][A

tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:19, 20.98s/it][A

tensor(0.6720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<20:59, 20.99s/it][A

tensor(0.6411, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:37, 20.97s/it][A

tensor(0.6957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:44<20:15, 20.95s/it][A

tensor(0.6693, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:05<19:54, 20.95s/it][A

tensor(0.6952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:26<19:33, 20.96s/it][A

tensor(0.6941, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:47<19:13, 20.97s/it][A

tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:08<18:50, 20.94s/it][A

tensor(0.6859, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:29<18:30, 20.95s/it][A

tensor(0.7324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:08, 20.94s/it][A

tensor(0.7109, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:10<17:47, 20.93s/it][A

tensor(0.7163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:25, 20.91s/it][A

tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<17:04, 20.91s/it][A

tensor(0.6247, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:13<16:42, 20.90s/it][A

tensor(0.7043, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:34<16:22, 20.90s/it][A

tensor(0.6757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:55<16:01, 20.90s/it][A

tensor(0.7060, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:16<15:40, 20.90s/it][A

tensor(0.6951, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:21, 20.95s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<15:02, 20.98s/it][A

tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:19<14:42, 21.01s/it][A

tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:40<14:20, 20.98s/it][A

tensor(0.6908, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:01<13:59, 20.99s/it][A

tensor(0.6984, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:22<13:38, 20.99s/it][A

tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:43<13:16, 20.97s/it][A

tensor(0.6701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:04<12:56, 20.98s/it][A

tensor(0.7177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:25<12:34, 20.96s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:46<12:13, 20.96s/it][A

tensor(0.6816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:06<11:51, 20.94s/it][A

tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:27<11:31, 20.94s/it][A

tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:48<11:10, 20.96s/it][A

tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:09<10:48, 20.92s/it][A

tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:30<10:27, 20.93s/it][A

tensor(0.7046, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:51<10:07, 20.96s/it][A

tensor(0.6678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:12<09:47, 20.97s/it][A

tensor(0.7149, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:33<09:25, 20.94s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:54<09:04, 20.93s/it][A

tensor(0.6960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:15<08:44, 20.96s/it][A

tensor(0.7038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:36<08:23, 20.98s/it][A

tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:57<08:02, 20.99s/it][A

tensor(0.6900, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:18<07:41, 20.98s/it][A

tensor(0.6320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:39<07:21, 21.00s/it][A

tensor(0.6894, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [22:00<06:59, 20.96s/it][A

loss: tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:21<06:37, 20.95s/it][A

loss: tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:42<06:17, 20.96s/it][A

loss: tensor(0.7080, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [23:03<05:56, 20.99s/it][A

loss: tensor(0.6736, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:24<05:35, 20.97s/it][A

tensor(0.7157, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:45<05:14, 20.98s/it][A

tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:06<04:53, 20.94s/it][A

tensor(0.7031, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:27<04:32, 20.93s/it][A

tensor(0.7201, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:47<04:10, 20.91s/it][A

tensor(0.7045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:08<03:50, 20.94s/it][A

tensor(0.6728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:29<03:29, 20.94s/it][A

tensor(0.7143, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:50<03:08, 20.90s/it][A

tensor(0.6973, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:11<02:46, 20.87s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:32<02:26, 20.88s/it][A

tensor(0.7197, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:53<02:05, 20.89s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:14<01:44, 20.83s/it][A

tensor(0.6625, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:34<01:23, 20.80s/it][A

loss: tensor(0.6843, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:55<01:02, 20.82s/it][A

loss: tensor(0.6568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:16<00:41, 20.87s/it][A

tensor(0.6930, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:37<00:20, 20.88s/it][A

tensor(0.6791, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:44<00:00, 20.78s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6484, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6816716179790268

	train acc: 0.6398206462212486

	training prec: 0.8059745001433829

	training rec: 0.6398206462212486

	training f1: 0.6988199291157808

	Current Learning rate:  8.571428571428571e-06



  2%|▏         | 1/42 [00:02<01:47,  2.62s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.60s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.60s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.61s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:20<01:29,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.66s/it][A
 26%|██▌       | 11/42 [00:28<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:04,  2.60s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6695798110394251

	Validation acc: 0.7817460317460319

	Validation prec: 0.8520302436106403

	Validation rec: 0.7817460317460319

	Validation f1: 0.8060613194967859
loss: 


  1%|          | 1/83 [00:21<28:47, 21.07s/it][A

tensor(0.6993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:22, 21.02s/it][A

tensor(0.6582, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:02, 21.03s/it][A

tensor(0.6456, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:37, 20.99s/it][A

tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:17, 20.99s/it][A

tensor(0.6553, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<26:56, 21.00s/it][A

tensor(0.6708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:33, 20.96s/it][A

tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:10, 20.95s/it][A

tensor(0.6446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:51, 20.97s/it][A

tensor(0.6662, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:29, 20.96s/it][A

tensor(0.6546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:09, 20.97s/it][A

tensor(0.7006, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:50, 20.99s/it][A

tensor(0.6678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:27, 20.97s/it][A

tensor(0.6705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:08, 20.99s/it][A

tensor(0.6854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:47, 21.00s/it][A

tensor(0.6575, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:28, 21.02s/it][A

tensor(0.7305, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:04, 20.98s/it][A

tensor(0.6314, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:41, 20.95s/it][A

tensor(0.6954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:20, 20.94s/it][A

tensor(0.7033, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<21:55, 20.89s/it][A

tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:33, 20.87s/it][A

tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:16, 20.92s/it][A

tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<20:54, 20.91s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:33, 20.90s/it][A

tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:12, 20.91s/it][A

tensor(0.6689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:51, 20.90s/it][A

tensor(0.6448, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:31, 20.93s/it][A

tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:09, 20.89s/it][A

tensor(0.7057, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<18:49, 20.91s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:28<18:30, 20.95s/it][A

tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:07, 20.92s/it][A

tensor(0.7067, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:10<17:45, 20.89s/it][A

tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:27, 20.94s/it][A

tensor(0.6943, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<17:08, 20.98s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:13<16:49, 21.03s/it][A

tensor(0.7063, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:34<16:29, 21.06s/it][A

tensor(0.7014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:55<16:09, 21.08s/it][A

tensor(0.6618, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:16<15:48, 21.07s/it][A

tensor(0.7221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:27, 21.07s/it][A

tensor(0.6701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:07, 21.10s/it][A

tensor(0.6849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:47, 21.13s/it][A

tensor(0.6619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:41<14:26, 21.14s/it][A

tensor(0.6753, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:02<14:06, 21.16s/it][A

tensor(0.6631, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:23<13:45, 21.16s/it][A

tensor(0.6600, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:44<13:23, 21.14s/it][A

tensor(0.7364, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:05<13:00, 21.09s/it][A

tensor(0.6357, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:38, 21.08s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:47<12:15, 21.03s/it][A

tensor(0.6659, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:08<11:54, 21.02s/it][A

tensor(0.7097, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:29<11:32, 21.00s/it][A

tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:50<11:11, 20.97s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:11<10:49, 20.97s/it][A

tensor(0.6978, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:32<10:29, 20.99s/it][A

tensor(0.7280, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:53<10:09, 21.00s/it][A

tensor(0.6814, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:14<09:48, 21.02s/it][A

tensor(0.7109, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:35<09:27, 21.04s/it][A

loss: tensor(0.6985, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:56<09:06, 21.02s/it][A

loss: tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:17<08:45, 21.00s/it][A

loss: tensor(0.6722, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:38<08:23, 20.99s/it][A

loss: tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:59<08:02, 21.00s/it][A

loss: tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:20<07:43, 21.06s/it][A

tensor(0.7401, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:42<07:22, 21.09s/it][A

tensor(0.6799, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:03<07:02, 21.13s/it][A

tensor(0.7193, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:24<06:40, 21.09s/it][A

tensor(0.7121, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:45<06:18, 21.04s/it][A

tensor(0.6473, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:06<05:57, 21.03s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:27<05:36, 21.00s/it][A

tensor(0.6896, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:48<05:16, 21.07s/it][A

tensor(0.7181, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:09<04:55, 21.08s/it][A

tensor(0.6849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:30<04:33, 21.07s/it][A

tensor(0.6995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:51<04:13, 21.12s/it][A

tensor(0.7001, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:12<03:52, 21.12s/it][A

tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:33<03:30, 21.07s/it][A

tensor(0.6987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:54<03:09, 21.05s/it][A

tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:16<02:48, 21.07s/it][A

tensor(0.6932, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:37<02:27, 21.06s/it][A

tensor(0.7030, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:58<02:06, 21.04s/it][A

tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [27:18<01:44, 21.00s/it][A

loss: tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:40<01:24, 21.05s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:01<01:03, 21.03s/it][A

tensor(0.6726, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [28:22<00:41, 20.99s/it][A

loss: tensor(0.6670, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:42<00:20, 20.98s/it][A

tensor(0.6594, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:50<00:00, 20.85s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6772, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6790041528552412

	train acc: 0.6522453450164293

	training prec: 0.8047199825099787

	training rec: 0.6522453450164293

	training f1: 0.7072294055144601

	Current Learning rate:  8.285714285714287e-06



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.68s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.65s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.66s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.67s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.66s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.65s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.65s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6676688194274902

	Validation acc: 0.619295634920635

	Validation prec: 0.8747781293288845

	Validation rec: 0.619295634920635

	Validation f1: 0.685436387404309
loss: 


  1%|          | 1/83 [00:21<28:59, 21.22s/it][A

tensor(0.6441, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:30, 21.12s/it][A

tensor(0.6560, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:06, 21.08s/it][A

tensor(0.6995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:40, 21.01s/it][A

tensor(0.7105, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:14, 20.96s/it][A

tensor(0.6977, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:51, 20.92s/it][A

tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:28, 20.90s/it][A

tensor(0.7076, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:08, 20.91s/it][A

tensor(0.6305, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:53, 20.99s/it][A

tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:39, 21.09s/it][A

tensor(0.6971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:51<25:20, 21.12s/it][A

tensor(0.6775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:12<25:02, 21.16s/it][A

tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:33<24:39, 21.14s/it][A

tensor(0.6729, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:54<24:16, 21.11s/it][A

tensor(0.6699, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:15<23:52, 21.06s/it][A

tensor(0.6285, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:36<23:26, 21.00s/it][A

tensor(0.7119, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:57<23:02, 20.95s/it][A

tensor(0.6274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:18<22:38, 20.90s/it][A

tensor(0.6694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:39<22:17, 20.90s/it][A

tensor(0.7235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<21:56, 20.90s/it][A

tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:35, 20.89s/it][A

tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:11, 20.85s/it][A

tensor(0.6969, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<20:49, 20.83s/it][A

tensor(0.6809, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:28, 20.83s/it][A

tensor(0.6824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:44<20:07, 20.83s/it][A

tensor(0.7176, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:47, 20.84s/it][A

tensor(0.7189, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:26, 20.82s/it][A

tensor(0.6817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:05, 20.83s/it][A

tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<18:46, 20.85s/it][A

tensor(0.6847, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:28<18:25, 20.86s/it][A

tensor(0.6531, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:06, 20.89s/it][A

tensor(0.6803, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:10<17:47, 20.93s/it][A

tensor(0.6709, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:26, 20.92s/it][A

tensor(0.6981, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<17:06, 20.95s/it][A

tensor(0.6800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:13<16:46, 20.96s/it][A

tensor(0.6824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:34<16:28, 21.02s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:55<16:05, 21.00s/it][A

tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:16<15:46, 21.03s/it][A

tensor(0.6735, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:24, 21.02s/it][A

tensor(0.6246, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<15:02, 21.00s/it][A

tensor(0.7231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:19<14:42, 21.01s/it][A

tensor(0.6816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:40<14:21, 21.02s/it][A

tensor(0.6096, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:01<14:00, 21.00s/it][A

tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:22<13:39, 21.01s/it][A

tensor(0.7198, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:43<13:18, 21.01s/it][A

tensor(0.6881, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:04<12:57, 21.02s/it][A

tensor(0.6890, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:25<12:36, 21.02s/it][A

tensor(0.6862, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:46<12:14, 20.99s/it][A

tensor(0.7074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:07<11:53, 20.98s/it][A

tensor(0.6741, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:28<11:32, 20.97s/it][A

tensor(0.6359, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:49<11:12, 21.02s/it][A

tensor(0.6661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:10<10:51, 21.02s/it][A

tensor(0.6524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:31<10:31, 21.04s/it][A

tensor(0.6840, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:52<10:09, 21.01s/it][A

loss: tensor(0.6448, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:13<09:47, 20.99s/it][A

loss: tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:34<09:25, 20.95s/it][A

loss: tensor(0.6973, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:55<09:04, 20.96s/it][A

loss: tensor(0.6741, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:16<08:44, 20.98s/it][A

loss: tensor(0.7095, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:37<08:24, 21.01s/it][A

loss: tensor(0.7027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:58<08:03, 21.01s/it][A

tensor(0.6614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:19<07:41, 21.00s/it][A

tensor(0.7099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:40<07:20, 20.97s/it][A

tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:01<07:00, 21.03s/it][A

tensor(0.6386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:22<06:39, 21.01s/it][A

tensor(0.6956, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:43<06:18, 21.03s/it][A

tensor(0.6432, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:04<05:57, 21.01s/it][A

tensor(0.6562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:25<05:36, 21.05s/it][A

tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:46<05:15, 21.04s/it][A

tensor(0.7091, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:07<04:54, 21.05s/it][A

tensor(0.6724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:28<04:33, 21.02s/it][A

tensor(0.6849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:49<04:12, 21.02s/it][A

tensor(0.6862, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:10<03:51, 21.00s/it][A

tensor(0.6434, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:31<03:29, 20.99s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:52<03:09, 21.01s/it][A

tensor(0.6319, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:13<02:48, 21.03s/it][A

tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:34<02:27, 21.03s/it][A

tensor(0.6541, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:55<02:06, 21.04s/it][A

loss: tensor(0.6951, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [27:16<01:44, 21.00s/it][A

loss: tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:37<01:23, 20.98s/it][A

tensor(0.7050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:58<01:02, 20.93s/it][A

tensor(0.6784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:19<00:41, 20.91s/it][A

tensor(0.7105, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:40<00:20, 20.93s/it][A

tensor(0.6911, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:47<00:00, 20.81s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6134, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6764605691634029

	train acc: 0.5784330503833516

	training prec: 0.8183133500474729

	training rec: 0.5784330503833516

	training f1: 0.6509971033658005

	Current Learning rate:  8.000000000000001e-06



  2%|▏         | 1/42 [00:02<01:45,  2.58s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.62s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:20<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.62s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.659945798771722

	Validation acc: 0.7619047619047619

	Validation prec: 0.8541820922067906

	Validation rec: 0.7619047619047619

	Validation f1: 0.7945220194528747
loss: 


  1%|          | 1/83 [00:20<28:41, 20.99s/it][A

tensor(0.7003, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:21, 21.00s/it][A

tensor(0.6316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:02, 21.03s/it][A

tensor(0.7356, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:37, 20.98s/it][A

tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:17, 20.99s/it][A

tensor(0.6946, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:53, 20.96s/it][A

tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:31, 20.94s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:08, 20.91s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:47, 20.92s/it][A

tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:27, 20.92s/it][A

tensor(0.6680, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:06, 20.93s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:45, 20.92s/it][A

tensor(0.6532, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:28, 20.98s/it][A

tensor(0.6497, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:05, 20.95s/it][A

tensor(0.6524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:48, 21.01s/it][A

tensor(0.6695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:24, 20.96s/it][A

tensor(0.6498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:07, 21.02s/it][A

tensor(0.6679, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:48, 21.05s/it][A

tensor(0.6783, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:31, 21.12s/it][A

tensor(0.6966, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<22:08, 21.08s/it][A

tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:44, 21.04s/it][A

tensor(0.6461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:21, 21.01s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<21:03, 21.05s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:41, 21.05s/it][A

tensor(0.7241, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:45<20:21, 21.07s/it][A

tensor(0.6822, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:06<20:00, 21.06s/it][A

tensor(0.6953, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:27<19:40, 21.08s/it][A

tensor(0.7201, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:48<19:19, 21.08s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:09<18:56, 21.05s/it][A

tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:30<18:35, 21.06s/it][A

tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:51<18:15, 21.07s/it][A

tensor(0.6693, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:12<17:53, 21.06s/it][A

tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:33<17:33, 21.07s/it][A

tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:54<17:11, 21.05s/it][A

loss: tensor(0.6269, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:15<16:50, 21.06s/it][A

loss: tensor(0.6770, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:36<16:27, 21.01s/it][A

loss: tensor(0.6992, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:57<16:04, 20.97s/it][A

loss: tensor(0.6915, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:18<15:41, 20.93s/it][A

loss: tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:39<15:20, 20.91s/it][A

loss: tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [14:00<14:58, 20.89s/it][A

loss: tensor(0.6495, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:20<14:36, 20.86s/it][A

loss: tensor(0.6725, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:41<14:16, 20.89s/it][A

loss: tensor(0.7264, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [15:02<13:54, 20.86s/it][A

loss: tensor(0.6607, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:23<13:33, 20.85s/it][A

loss: tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:44<13:11, 20.83s/it][A

loss: tensor(0.6747, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [16:05<12:51, 20.86s/it][A

loss: tensor(0.7222, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:25<12:30, 20.85s/it][A

loss: tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:46<12:08, 20.80s/it][A

loss: tensor(0.6791, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:07<11:47, 20.81s/it][A

loss: tensor(0.6568, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:28<11:27, 20.82s/it][A

loss: tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:49<11:08, 20.88s/it][A

loss: tensor(0.7081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:10<10:48, 20.92s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:31<10:27, 20.92s/it][A

tensor(0.6864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:52<10:06, 20.93s/it][A

tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:12<09:44, 20.89s/it][A

tensor(0.6810, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:33<09:24, 20.89s/it][A

tensor(0.6430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:54<09:04, 20.95s/it][A

tensor(0.6966, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:16<08:45, 21.00s/it][A

tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:37<08:23, 20.99s/it][A

tensor(0.6931, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:57<08:02, 20.96s/it][A

tensor(0.6864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:18<07:41, 20.96s/it][A

tensor(0.6552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:39<07:19, 20.94s/it][A

tensor(0.6691, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:00<06:58, 20.92s/it][A

tensor(0.6251, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:21<06:38, 20.95s/it][A

tensor(0.6758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:42<06:17, 20.95s/it][A

tensor(0.6723, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:03<05:55, 20.93s/it][A

tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:24<05:34, 20.90s/it][A

tensor(0.7060, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:45<05:13, 20.89s/it][A

tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:06<04:52, 20.90s/it][A

tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:27<04:32, 20.93s/it][A

tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:48<04:10, 20.92s/it][A

tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:08<03:49, 20.91s/it][A

tensor(0.6999, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:29<03:28, 20.89s/it][A

tensor(0.6103, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:50<03:07, 20.86s/it][A

tensor(0.6965, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:11<02:46, 20.86s/it][A

tensor(0.6945, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:32<02:25, 20.83s/it][A

loss: tensor(0.6751, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:52<02:04, 20.80s/it][A

loss: tensor(0.6777, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [27:13<01:43, 20.79s/it][A

loss: tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:34<01:23, 20.78s/it][A

loss: tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:55<01:02, 20.81s/it][A

loss: 


 98%|█████████▊| 81/83 [28:16<00:41, 20.84s/it][A

tensor(0.6658, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:37<00:20, 20.88s/it][A

tensor(0.6531, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:44<00:00, 20.78s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7068, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6719135383525526

	train acc: 0.6610932365826944

	training prec: 0.8153684123030611

	training rec: 0.6610932365826944

	training f1: 0.715908986493497

	Current Learning rate:  7.714285714285716e-06



  2%|▏         | 1/42 [00:02<01:49,  2.67s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.63s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.65s/it][A
 33%|███▎      | 14/42 [00:36<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.65s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.6574985470090594

	Validation acc: 0.7217261904761905

	Validation prec: 0.8542282195682137

	Validation rec: 0.7217261904761905

	Validation f1: 0.7676804946411048
loss: 


  1%|          | 1/83 [00:21<28:56, 21.18s/it][A

tensor(0.6456, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:37, 21.20s/it][A

tensor(0.7127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:18, 21.23s/it][A

tensor(0.6733, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:58, 21.25s/it][A

tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:46<27:35, 21.22s/it][A

tensor(0.6874, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:07<27:12, 21.20s/it][A

tensor(0.6757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:50, 21.19s/it][A

tensor(0.6759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:49<26:30, 21.20s/it][A

tensor(0.7047, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:10, 21.23s/it][A

tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:32<25:48, 21.21s/it][A

tensor(0.6815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:53<25:27, 21.22s/it][A

tensor(0.6802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:14<25:06, 21.22s/it][A

tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:35<24:43, 21.20s/it][A

tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:56<24:22, 21.19s/it][A

tensor(0.6465, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:18<23:59, 21.16s/it][A

tensor(0.6403, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:39<23:40, 21.21s/it][A

tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:00<23:18, 21.19s/it][A

tensor(0.6856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:21<22:56, 21.17s/it][A

tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:42<22:33, 21.14s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:03<22:10, 21.12s/it][A

tensor(0.6720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:24<21:51, 21.16s/it][A

tensor(0.7030, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:46<21:31, 21.17s/it][A

tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:07<21:10, 21.17s/it][A

tensor(0.6892, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:28<20:48, 21.15s/it][A

tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:49<20:29, 21.21s/it][A

tensor(0.7208, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:10<20:05, 21.15s/it][A

tensor(0.6771, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:31<19:41, 21.10s/it][A

tensor(0.7005, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:53<19:22, 21.13s/it][A

tensor(0.6772, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:14<19:02, 21.16s/it][A

tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:35<18:42, 21.18s/it][A

tensor(0.6792, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:56<18:19, 21.14s/it][A

loss: tensor(0.6291, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:17<17:58, 21.15s/it][A

loss: tensor(0.7355, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:38<17:35, 21.11s/it][A

loss: tensor(0.6601, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:59<17:15, 21.13s/it][A

loss: tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:20<16:52, 21.10s/it][A

loss: tensor(0.6447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:42<16:33, 21.13s/it][A

tensor(0.5977, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:03<16:12, 21.14s/it][A

tensor(0.5904, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:24<15:52, 21.18s/it][A

tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:45<15:32, 21.18s/it][A

tensor(0.7138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:06<15:11, 21.20s/it][A

tensor(0.6692, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:28<14:50, 21.19s/it][A

tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:49<14:30, 21.22s/it][A

tensor(0.6775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:10<14:09, 21.23s/it][A

tensor(0.6774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:31<13:46, 21.20s/it][A

tensor(0.7724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:53<13:25, 21.20s/it][A

tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:14<13:04, 21.21s/it][A

tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:35<12:44, 21.23s/it][A

tensor(0.6905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:56<12:23, 21.25s/it][A

tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:18<12:01, 21.23s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:39<11:39, 21.20s/it][A

tensor(0.6774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:00<11:17, 21.16s/it][A

tensor(0.7004, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:21<10:55, 21.16s/it][A

tensor(0.6529, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:42<10:34, 21.17s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:03<10:12, 21.13s/it][A

tensor(0.7167, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:24<09:51, 21.13s/it][A

tensor(0.6606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:45<09:31, 21.15s/it][A

tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:06<09:09, 21.13s/it][A

tensor(0.6947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:28<08:48, 21.14s/it][A

tensor(0.6719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:49<08:26, 21.12s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:10<08:05, 21.10s/it][A

tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:31<07:44, 21.10s/it][A

tensor(0.6694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:52<07:23, 21.10s/it][A

tensor(0.7092, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:13<07:01, 21.10s/it][A

tensor(0.6948, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:34<06:41, 21.11s/it][A

tensor(0.6669, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:55<06:19, 21.11s/it][A

tensor(0.6736, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:17<05:59, 21.14s/it][A

tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:38<05:38, 21.13s/it][A

tensor(0.6936, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:59<05:16, 21.11s/it][A

tensor(0.6781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:20<04:55, 21.10s/it][A

tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:41<04:34, 21.09s/it][A

tensor(0.6727, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:02<04:13, 21.08s/it][A

tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:23<03:52, 21.13s/it][A

tensor(0.6234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:44<03:31, 21.16s/it][A

tensor(0.6329, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [26:05<03:09, 21.10s/it][A

loss: tensor(0.6922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:27<02:49, 21.14s/it][A

tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:48<02:27, 21.11s/it][A

loss: tensor(0.6754, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [27:09<02:06, 21.09s/it][A

loss: tensor(0.6478, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:30<01:45, 21.12s/it][A

tensor(0.6453, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:51<01:24, 21.11s/it][A

tensor(0.6538, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:12<01:03, 21.06s/it][A

tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:33<00:41, 20.96s/it][A

tensor(0.6836, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:53<00:20, 20.94s/it][A

tensor(0.6243, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:01<00:00, 20.98s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6695390049233494

	train acc: 0.6786349945235488

	training prec: 0.8206209744391856

	training rec: 0.6786349945235488

	training f1: 0.7288733183340579

	Current Learning rate:  7.428571428571429e-06



  2%|▏         | 1/42 [00:02<01:48,  2.65s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.61s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.61s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.61s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.61s/it][A
 19%|█▉        | 8/42 [00:20<01:29,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.61s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.62s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6562137234778631

	Validation acc: 0.6607142857142857

	Validation prec: 0.8753193859966133

	Validation rec: 0.6607142857142857

	Validation f1: 0.7221571643082024
loss: 


  1%|          | 1/83 [00:20<28:32, 20.88s/it][A

tensor(0.6816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:13, 20.91s/it][A

tensor(0.6763, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:58, 20.98s/it][A

tensor(0.7027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:44, 21.07s/it][A

tensor(0.6700, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:22, 21.06s/it][A

tensor(0.6922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:01, 21.06s/it][A

tensor(0.6628, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:42, 21.09s/it][A

tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:17, 21.04s/it][A

tensor(0.6810, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:09<25:54, 21.01s/it][A

loss: tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:30<25:29, 20.96s/it][A

loss: tensor(0.6659, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:50<25:05, 20.91s/it][A

loss: tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:11<24:41, 20.87s/it][A

loss: tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:32<24:21, 20.87s/it][A

loss: tensor(0.6953, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:53<23:58, 20.84s/it][A

loss: tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:14<23:37, 20.84s/it][A

loss: tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:35<23:17, 20.86s/it][A

loss: tensor(0.6644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<22:56, 20.86s/it][A

tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:38, 20.90s/it][A

tensor(0.6399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:17, 20.90s/it][A

tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:57, 20.92s/it][A

tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:37, 20.92s/it][A

tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:40<21:15, 20.91s/it][A

tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:01<20:52, 20.88s/it][A

tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:32, 20.89s/it][A

tensor(0.6475, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:14, 20.95s/it][A

tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:58, 21.02s/it][A

tensor(0.6774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:37, 21.03s/it][A

tensor(0.6760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:17, 21.05s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<18:53, 21.00s/it][A

tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:28<18:33, 21.00s/it][A

tensor(0.6545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:12, 21.02s/it][A

tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:10<17:52, 21.03s/it][A

tensor(0.6815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:32, 21.05s/it][A

tensor(0.6447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<17:12, 21.06s/it][A

tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:13<16:49, 21.04s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:34<16:29, 21.06s/it][A

tensor(0.7007, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<16:08, 21.06s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:47, 21.06s/it][A

tensor(0.6680, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:26, 21.07s/it][A

tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:06, 21.07s/it][A

tensor(0.6551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:44, 21.07s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:41<14:23, 21.05s/it][A

tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:02<14:02, 21.07s/it][A

tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:23<13:39, 21.01s/it][A

tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:44<13:18, 21.00s/it][A

tensor(0.7223, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:05<12:56, 20.98s/it][A

tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:33, 20.94s/it][A

tensor(0.6562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:46<12:12, 20.92s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:07<11:51, 20.94s/it][A

tensor(0.6492, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:28<11:30, 20.91s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:49<11:11, 20.97s/it][A

tensor(0.6929, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:10<10:49, 20.96s/it][A

tensor(0.6594, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:31<10:28, 20.96s/it][A

tensor(0.6573, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:52<10:07, 20.95s/it][A

tensor(0.7150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:13<09:47, 20.97s/it][A

tensor(0.6394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:34<09:25, 20.94s/it][A

tensor(0.6678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:55<09:03, 20.92s/it][A

tensor(0.6435, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:16<08:42, 20.90s/it][A

tensor(0.6597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:37<08:21, 20.92s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:58<08:01, 20.92s/it][A

tensor(0.6400, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:19<07:40, 20.92s/it][A

tensor(0.6490, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:40<07:20, 20.95s/it][A

tensor(0.6562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:01<06:58, 20.93s/it][A

tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:22<06:38, 20.97s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:43<06:18, 21.02s/it][A

tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [23:04<05:56, 20.99s/it][A

loss: tensor(0.7009, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:24<05:35, 20.95s/it][A

loss: tensor(0.7060, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:45<05:13, 20.91s/it][A

loss: tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [24:06<04:52, 20.89s/it][A

loss: tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:27<04:31, 20.92s/it][A

loss: tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:48<04:11, 20.99s/it][A

loss: tensor(0.6974, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [25:09<03:51, 21.05s/it][A

loss: tensor(0.6551, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:31<03:30, 21.09s/it][A

loss: tensor(0.6522, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:52<03:09, 21.08s/it][A

loss: tensor(0.7025, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:13<02:49, 21.13s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:34<02:27, 21.13s/it][A

loss: tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:55<02:06, 21.04s/it][A

tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:16<01:44, 21.00s/it][A

tensor(0.6723, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:37<01:23, 20.98s/it][A

tensor(0.6985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:58<01:02, 20.94s/it][A

tensor(0.6914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:19<00:42, 21.02s/it][A

tensor(0.6470, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:40<00:21, 21.06s/it][A

tensor(0.6716, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:47<00:00, 20.82s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6619, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.665373071848628

	train acc: 0.6148343373493976

	training prec: 0.8293885458099203

	training rec: 0.6148343373493976

	training f1: 0.6792833377624979

	Current Learning rate:  7.1428571428571436e-06



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.62s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.65s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6518053213755289

	Validation acc: 0.6862599206349207

	Validation prec: 0.8660247825039326

	Validation rec: 0.6862599206349207

	Validation f1: 0.7422072058462994
loss: 


  1%|          | 1/83 [00:20<28:20, 20.73s/it][A

tensor(0.6960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:09, 20.86s/it][A

tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:53, 20.92s/it][A

tensor(0.6615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:36, 20.97s/it][A

tensor(0.6826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:12, 20.93s/it][A

tensor(0.7072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:52, 20.94s/it][A

tensor(0.6419, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:35, 20.99s/it][A

tensor(0.6926, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:11, 20.96s/it][A

tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:53, 20.99s/it][A

tensor(0.6429, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:33, 21.01s/it][A

tensor(0.6227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:15, 21.05s/it][A

tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:52, 21.03s/it][A

tensor(0.6436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:30, 21.00s/it][A

tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:09, 21.00s/it][A

tensor(0.6643, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:48, 21.01s/it][A

tensor(0.6933, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:28, 21.02s/it][A

tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:08, 21.03s/it][A

tensor(0.7183, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:44, 20.99s/it][A

tensor(0.6541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:20, 20.95s/it][A

tensor(0.6688, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<21:56, 20.90s/it][A

tensor(0.7051, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:36, 20.90s/it][A

tensor(0.6796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:13, 20.88s/it][A

tensor(0.6728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:01<20:51, 20.85s/it][A

tensor(0.6611, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:31, 20.87s/it][A

tensor(0.6672, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:12, 20.90s/it][A

tensor(0.6802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:50, 20.88s/it][A

tensor(0.7281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:32, 20.93s/it][A

tensor(0.6285, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:10, 20.91s/it][A

tensor(0.6410, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<18:48, 20.89s/it][A

tensor(0.7002, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:28<18:29, 20.93s/it][A

tensor(0.6638, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:10, 20.98s/it][A

tensor(0.6548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:10<17:46, 20.90s/it][A

tensor(0.6648, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:23, 20.87s/it][A

tensor(0.6682, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:51<17:01, 20.85s/it][A

tensor(0.6722, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:12<16:39, 20.83s/it][A

tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:33<16:24, 20.94s/it][A

tensor(0.6786, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:54<16:04, 20.97s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:15<15:44, 20.99s/it][A

tensor(0.7224, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:26, 21.06s/it][A

tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<15:05, 21.06s/it][A

tensor(0.7068, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:19<14:43, 21.04s/it][A

tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:40<14:20, 20.99s/it][A

tensor(0.7160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:01<14:00, 21.01s/it][A

tensor(0.6614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:22<13:39, 21.02s/it][A

tensor(0.6946, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:43<13:18, 21.02s/it][A

tensor(0.6669, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:04<12:58, 21.03s/it][A

tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:25<12:38, 21.08s/it][A

tensor(0.6951, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:46<12:15, 21.01s/it][A

loss: tensor(0.6561, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:07<11:52, 20.95s/it][A

loss: tensor(0.6479, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:27<11:29, 20.89s/it][A

loss: tensor(0.6364, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:48<11:07, 20.86s/it][A

loss: tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:09<10:46, 20.87s/it][A

loss: tensor(0.6316, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:30<10:25, 20.84s/it][A

loss: tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:51<10:04, 20.83s/it][A

loss: tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:11<09:43, 20.84s/it][A

loss: tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:32<09:22, 20.85s/it][A

tensor(0.6395, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:53<09:03, 20.91s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:15<08:44, 21.00s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:36<08:24, 21.04s/it][A

tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:57<08:03, 21.00s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:18<07:41, 20.97s/it][A

tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:38<07:20, 20.97s/it][A

tensor(0.6355, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:59<06:59, 20.96s/it][A

tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:20<06:38, 20.98s/it][A

tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:41<06:17, 20.99s/it][A

tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:02<05:55, 20.93s/it][A

tensor(0.6566, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:23<05:34, 20.92s/it][A

tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:44<05:13, 20.93s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:05<04:52, 20.89s/it][A

tensor(0.6790, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:26<04:30, 20.84s/it][A

tensor(0.6814, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:46<04:09, 20.80s/it][A

tensor(0.6372, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [25:07<03:48, 20.77s/it][A

loss: tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:28<03:27, 20.72s/it][A

loss: tensor(0.7052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:48<03:06, 20.69s/it][A

tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [26:09<02:45, 20.68s/it][A

loss: tensor(0.6746, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:30<02:24, 20.67s/it][A

loss: tensor(0.6778, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:50<02:04, 20.73s/it][A

tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:11<01:43, 20.78s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:32<01:23, 20.82s/it][A

tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:53<01:02, 20.85s/it][A

tensor(0.6542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:14<00:41, 20.88s/it][A

tensor(0.6492, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:35<00:20, 20.89s/it][A

tensor(0.6671, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:42<00:00, 20.75s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6656246379197363

	train acc: 0.6404367469879518

	training prec: 0.8293328715286075

	training rec: 0.6404367469879518

	training f1: 0.7017295852095152

	Current Learning rate:  6.857142857142858e-06



  2%|▏         | 1/42 [00:02<01:48,  2.64s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:20<01:27,  2.59s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.60s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.61s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.60s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.62s/it][A
 33%|███▎      | 14/42 [00:36<01:12,  2.60s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6492143032096681

	Validation acc: 0.689484126984127

	Validation prec: 0.865966860862896

	Validation rec: 0.689484126984127

	Validation f1: 0.744478531666877
loss: 


  1%|          | 1/83 [00:20<28:32, 20.88s/it][A

tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:10, 20.88s/it][A

tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:48, 20.85s/it][A

tensor(0.6584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:25, 20.83s/it][A

tensor(0.6267, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:02, 20.81s/it][A

tensor(0.6528, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:43, 20.82s/it][A

tensor(0.6829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:19, 20.79s/it][A

tensor(0.6957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<25:58, 20.78s/it][A

tensor(0.6606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:39, 20.80s/it][A

tensor(0.6398, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:20, 20.83s/it][A

tensor(0.7010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:00, 20.83s/it][A

tensor(0.7092, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:09<24:41, 20.86s/it][A

tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:30<24:20, 20.87s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:51<24:02, 20.91s/it][A

tensor(0.7040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:12<23:43, 20.94s/it][A

tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:26, 21.00s/it][A

tensor(0.6180, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:54<23:04, 20.98s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:15<22:43, 20.98s/it][A

tensor(0.6572, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:23, 20.99s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<22:03, 21.01s/it][A

tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:44, 21.04s/it][A

tensor(0.6724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:40<21:22, 21.03s/it][A

tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:00<20:58, 20.97s/it][A

tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:21<20:36, 20.96s/it][A

tensor(0.6950, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:20, 21.05s/it][A

tensor(0.6746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<20:01, 21.07s/it][A

tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:43, 21.13s/it][A

tensor(0.6522, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:20, 21.11s/it][A

tensor(0.7044, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<19:01, 21.14s/it][A

tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:28<18:39, 21.12s/it][A

tensor(0.6846, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:50<18:20, 21.17s/it][A

tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:11<17:57, 21.12s/it][A

tensor(0.6267, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:32<17:35, 21.10s/it][A

tensor(0.6665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:53<17:14, 21.12s/it][A

tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:54, 21.13s/it][A

tensor(0.6738, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:29, 21.05s/it][A

tensor(0.6821, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<16:06, 21.02s/it][A

tensor(0.7203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:44, 20.99s/it][A

tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:21, 20.94s/it][A

tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:00, 20.94s/it][A

tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:19<14:38, 20.92s/it][A

tensor(0.6545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:40<14:16, 20.88s/it][A

tensor(0.6687, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:01<13:55, 20.89s/it][A

tensor(0.6494, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:22<13:34, 20.89s/it][A

tensor(0.6493, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:43<13:14, 20.90s/it][A

tensor(0.6612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:05<13:02, 21.14s/it][A

tensor(0.6366, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:45, 21.26s/it][A

tensor(0.6606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:48<12:27, 21.35s/it][A

tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:10<12:12, 21.54s/it][A

tensor(0.6785, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:31<11:49, 21.51s/it][A

loss: tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:52<11:23, 21.37s/it][A

loss: tensor(0.6581, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:13<10:59, 21.27s/it][A

loss: tensor(0.6708, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:34<10:34, 21.14s/it][A

loss: tensor(0.6824, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:55<10:10, 21.06s/it][A

loss: tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:16<09:48, 21.01s/it][A

loss: tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:37<09:27, 21.00s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:58<09:05, 20.96s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:19<08:44, 20.97s/it][A

tensor(0.6643, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:40<08:24, 21.00s/it][A

tensor(0.7010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:01<08:03, 21.00s/it][A

tensor(0.6507, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:22<07:41, 21.00s/it][A

tensor(0.6817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:43<07:20, 20.97s/it][A

tensor(0.6382, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:03<06:58, 20.94s/it][A

tensor(0.6538, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:25<06:39, 21.00s/it][A

tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:46<06:18, 21.01s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:07<05:56, 20.98s/it][A

tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:27<05:34, 20.94s/it][A

tensor(0.7034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:48<05:13, 20.93s/it][A

tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:09<04:52, 20.90s/it][A

tensor(0.6642, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:30<04:31, 20.91s/it][A

tensor(0.6497, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:51<04:10, 20.87s/it][A

loss: tensor(0.6379, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:12<03:49, 20.83s/it][A

tensor(0.6402, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:32<03:27, 20.78s/it][A

loss: tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:53<03:06, 20.77s/it][A

loss: tensor(0.6246, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [26:14<02:46, 20.77s/it][A

loss: tensor(0.6311, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:35<02:25, 20.82s/it][A

tensor(0.6552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:56<02:04, 20.82s/it][A

tensor(0.6840, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:16<01:44, 20.85s/it][A

tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:37<01:23, 20.88s/it][A

tensor(0.6777, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:58<01:02, 20.88s/it][A

tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:19<00:41, 20.87s/it][A

tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:40<00:20, 20.88s/it][A

tensor(0.7409, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:47<00:00, 20.81s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6273, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6617497270365795

	train acc: 0.6669119660460021

	training prec: 0.8257019188240088

	training rec: 0.6669119660460021

	training f1: 0.7216776340914768

	Current Learning rate:  6.571428571428572e-06



  2%|▏         | 1/42 [00:02<01:48,  2.65s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.63s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.60s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.59s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.61s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.62s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.60s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.60s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6468037920338767

	Validation acc: 0.6681547619047619

	Validation prec: 0.8675519105477368

	Validation rec: 0.6681547619047619

	Validation f1: 0.7241567706693328
loss: 


  1%|          | 1/83 [00:21<28:51, 21.11s/it][A

tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:24, 21.04s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:07, 21.09s/it][A

tensor(0.6679, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:46, 21.09s/it][A

tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:23, 21.08s/it][A

tensor(0.6393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:03, 21.09s/it][A

tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:41, 21.07s/it][A

tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:14, 20.99s/it][A

tensor(0.6348, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:09<25:54, 21.01s/it][A

tensor(0.6398, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:33, 21.01s/it][A

tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:51<25:12, 21.00s/it][A

tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:12<24:50, 20.99s/it][A

tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:33<24:29, 21.00s/it][A

tensor(0.6653, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:54<24:06, 20.96s/it][A

tensor(0.6207, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:15<23:45, 20.96s/it][A

tensor(0.6896, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:36<23:26, 20.99s/it][A

tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:57<23:06, 21.00s/it][A

tensor(0.5969, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:18<22:45, 21.01s/it][A

tensor(0.6274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:39<22:25, 21.02s/it][A

tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:00<22:05, 21.04s/it][A

tensor(0.6232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:21<21:46, 21.07s/it][A

tensor(0.6911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:42<21:23, 21.04s/it][A

tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:03<20:59, 21.00s/it][A

tensor(0.6736, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:24<20:37, 20.98s/it][A

tensor(0.6691, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:45<20:16, 20.98s/it][A

tensor(0.7234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:06<19:56, 20.99s/it][A

tensor(0.6512, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:27<19:36, 21.01s/it][A

tensor(0.6848, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:48<19:14, 20.99s/it][A

tensor(0.6562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:09<18:54, 21.01s/it][A

tensor(0.7037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:30<18:34, 21.03s/it][A

tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:51<18:13, 21.02s/it][A

tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:12<17:51, 21.01s/it][A

tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:33<17:27, 20.94s/it][A

tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:54<17:03, 20.89s/it][A

tensor(0.6816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:42, 20.88s/it][A

tensor(0.7252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:20, 20.86s/it][A

tensor(0.6758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<15:59, 20.85s/it][A

tensor(0.6453, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:41, 20.93s/it][A

tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:21, 20.94s/it][A

tensor(0.7062, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:00, 20.95s/it][A

tensor(0.6650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:38, 20.91s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:41<14:15, 20.87s/it][A

tensor(0.6858, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:02<13:53, 20.84s/it][A

tensor(0.7263, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:22<13:33, 20.86s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:43<13:11, 20.82s/it][A

tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:04<12:53, 20.91s/it][A

tensor(0.6591, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:25<12:32, 20.91s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:46<12:12, 20.94s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:07<11:51, 20.93s/it][A

loss: tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:28<11:31, 20.94s/it][A

loss: tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:49<11:09, 20.93s/it][A

loss: tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:10<10:48, 20.93s/it][A

loss: tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:31<10:27, 20.91s/it][A

loss: tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:52<10:05, 20.89s/it][A

loss: tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:13<09:45, 20.91s/it][A

loss: tensor(0.6940, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:33<09:24, 20.92s/it][A

loss: tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:54<09:03, 20.91s/it][A

loss: tensor(0.6634, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:15<08:42, 20.90s/it][A

loss: tensor(0.6459, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:36<08:21, 20.89s/it][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:57<08:00, 20.88s/it][A

loss: tensor(0.6567, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [21:18<07:39, 20.90s/it][A

loss: tensor(0.6536, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:39<07:18, 20.89s/it][A

loss: tensor(0.7258, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [22:00<06:58, 20.91s/it][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:21<06:37, 20.92s/it][A

loss: tensor(0.6342, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:42<06:16, 20.92s/it][A

loss: tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [23:03<05:55, 20.92s/it][A

loss: tensor(0.6137, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:23<05:34, 20.91s/it][A

tensor(0.6294, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:44<05:13, 20.92s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:05<04:53, 20.97s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:26<04:32, 20.98s/it][A

loss: tensor(0.6666, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:47<04:11, 20.98s/it][A

loss: tensor(0.6284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:09<03:51, 21.04s/it][A

tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:30<03:30, 21.08s/it][A

tensor(0.6844, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:51<03:09, 21.00s/it][A

tensor(0.6905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:11<02:47, 20.96s/it][A

tensor(0.6664, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:32<02:26, 20.94s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:53<02:05, 20.97s/it][A

tensor(0.7151, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:14<01:44, 20.97s/it][A

tensor(0.6603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:35<01:23, 20.96s/it][A

tensor(0.6614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:56<01:02, 20.90s/it][A

tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:17<00:41, 20.91s/it][A

tensor(0.6578, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:38<00:20, 20.92s/it][A

tensor(0.6279, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:45<00:00, 20.79s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6621211760015373

	train acc: 0.6352683461117196

	training prec: 0.8311718434725957

	training rec: 0.6352683461117196

	training f1: 0.6971967561277671

	Current Learning rate:  6.285714285714286e-06



  2%|▏         | 1/42 [00:02<01:45,  2.58s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.60s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.61s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.62s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.61s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.59s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.60s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6442978183428446

	Validation acc: 0.6800595238095238

	Validation prec: 0.8605305101014933

	Validation rec: 0.6800595238095238

	Validation f1: 0.7317054696618432
loss: 


  1%|          | 1/83 [00:20<28:30, 20.87s/it][A

tensor(0.6400, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:09, 20.86s/it][A

tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:49, 20.87s/it][A

tensor(0.6279, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:30, 20.90s/it][A

tensor(0.6543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:08, 20.87s/it][A

tensor(0.6666, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:45, 20.85s/it][A

tensor(0.6299, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:23, 20.83s/it][A

tensor(0.6619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:02, 20.84s/it][A

tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:41, 20.83s/it][A

tensor(0.7166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:18, 20.81s/it][A

tensor(0.6589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<24:59, 20.83s/it][A

tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:42, 20.87s/it][A

tensor(0.6406, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:23, 20.90s/it][A

tensor(0.6830, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<24:01, 20.89s/it][A

tensor(0.6660, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:12<23:40, 20.89s/it][A

tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:33<23:17, 20.86s/it][A

tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:54<22:56, 20.86s/it][A

tensor(0.6453, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:15<22:36, 20.87s/it][A

tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:15, 20.87s/it][A

tensor(0.7253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:57<21:52, 20.83s/it][A

tensor(0.6727, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:17<21:28, 20.78s/it][A

tensor(0.6859, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:38<21:07, 20.78s/it][A

tensor(0.6026, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:59<20:47, 20.79s/it][A

tensor(0.6793, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:20<20:24, 20.76s/it][A

tensor(0.6194, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:40<20:04, 20.77s/it][A

tensor(0.6945, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:01<19:43, 20.77s/it][A

tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:22<19:24, 20.80s/it][A

tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:43<19:04, 20.81s/it][A

tensor(0.6456, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:04<18:44, 20.82s/it][A

tensor(0.6436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:25<18:26, 20.87s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:46<18:08, 20.94s/it][A

tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:07<17:48, 20.95s/it][A

tensor(0.6976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:28<17:28, 20.97s/it][A

tensor(0.6603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:49<17:05, 20.93s/it][A

tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:09<16:43, 20.91s/it][A

tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:30<16:22, 20.90s/it][A

tensor(0.6353, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:51<15:58, 20.85s/it][A

tensor(0.6894, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:12<15:37, 20.83s/it][A

tensor(0.6493, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:33<15:19, 20.89s/it][A

tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:54<15:01, 20.98s/it][A

tensor(0.6034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:15<14:40, 20.97s/it][A

tensor(0.6277, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:36<14:20, 21.00s/it][A

tensor(0.6249, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:57<13:57, 20.94s/it][A

tensor(0.6555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:18<13:34, 20.87s/it][A

tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:38<13:13, 20.87s/it][A

tensor(0.6061, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:00<12:54, 20.93s/it][A

tensor(0.6898, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:20<12:32, 20.91s/it][A

tensor(0.6762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:41<12:10, 20.89s/it][A

tensor(0.6796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:02<11:51, 20.92s/it][A

tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:23<11:30, 20.93s/it][A

tensor(0.6348, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:44<11:10, 20.96s/it][A

tensor(0.6650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:05<10:50, 20.97s/it][A

tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:26<10:28, 20.96s/it][A

tensor(0.6956, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:47<10:06, 20.92s/it][A

tensor(0.7033, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:08<09:45, 20.91s/it][A

tensor(0.7205, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:29<09:25, 20.94s/it][A

tensor(0.6198, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:50<09:05, 20.99s/it][A

tensor(0.6305, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:11<08:43, 20.94s/it][A

tensor(0.6829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:32<08:22, 20.93s/it][A

tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:53<08:01, 20.92s/it][A

tensor(0.6758, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:14<07:40, 20.95s/it][A

tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:35<07:20, 20.98s/it][A

tensor(0.7099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:55<06:57, 20.90s/it][A

tensor(0.7361, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:16<06:36, 20.88s/it][A

tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:37<06:16, 20.91s/it][A

tensor(0.6443, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:58<05:55, 20.91s/it][A

tensor(0.6725, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:19<05:35, 20.96s/it][A

tensor(0.6358, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:40<05:14, 20.99s/it][A

tensor(0.6797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:01<04:53, 20.95s/it][A

tensor(0.6908, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:22<04:33, 21.02s/it][A
 86%|████████▌ | 71/83 [24:43<04:11, 21.00s/it][A

loss: tensor(0.6399, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [25:04<03:50, 20.98s/it][A

loss: tensor(0.6677, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:25<03:29, 20.94s/it][A

loss: tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:46<03:08, 20.99s/it][A

tensor(0.6423, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:07<02:47, 20.99s/it][A

tensor(0.6434, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:28<02:26, 20.95s/it][A

tensor(0.6108, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:49<02:05, 20.97s/it][A

tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:10<01:44, 20.93s/it][A

tensor(0.6310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:31<01:23, 20.93s/it][A

tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:52<01:02, 20.91s/it][A

tensor(0.6271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:12<00:41, 20.89s/it][A

tensor(0.6882, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:33<00:20, 20.85s/it][A

tensor(0.6351, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:40<00:00, 20.73s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6587357126086592

	train acc: 0.6430380613362542

	training prec: 0.8350415260301737

	training rec: 0.6430380613362542

	training f1: 0.7032548936690294

	Current Learning rate:  6e-06



  2%|▏         | 1/42 [00:02<01:48,  2.64s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.60s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.59s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.61s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.61s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.63s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.61s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.62s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.59s/it][A
 29%|██▊       | 12/42 [00:31<01:17,  2.60s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:12,  2.60s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6427422180062249

	Validation acc: 0.6763392857142857

	Validation prec: 0.8675092100559064

	Validation rec: 0.6763392857142857

	Validation f1: 0.7350948900995499
loss: 


  1%|          | 1/83 [00:20<28:25, 20.80s/it][A

tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:04, 20.80s/it][A

tensor(0.6465, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:48, 20.86s/it][A

tensor(0.6643, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:26, 20.85s/it][A

tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:09, 20.89s/it][A

tensor(0.6394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:48, 20.89s/it][A

tensor(0.6266, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:27, 20.89s/it][A

tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:08, 20.91s/it][A

tensor(0.6284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:49, 20.94s/it][A

tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:28<25:28, 20.93s/it][A

loss: tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:49<25:07, 20.94s/it][A

loss: tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:10<24:47, 20.95s/it][A

loss: tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:31<24:27, 20.96s/it][A

loss: tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:52<24:06, 20.96s/it][A

loss: tensor(0.7136, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:47, 20.99s/it][A

tensor(0.6621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:30, 21.06s/it][A

tensor(0.6445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:08, 21.04s/it][A

tensor(0.6605, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:47, 21.04s/it][A

tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:23, 20.99s/it][A

tensor(0.6247, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<22:01, 20.98s/it][A

tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:40, 20.97s/it][A

tensor(0.6947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:40<21:19, 20.98s/it][A

tensor(0.6879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:01<20:59, 20.99s/it][A

tensor(0.6518, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:37, 20.98s/it][A

tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:18, 21.00s/it][A

tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:05<19:58, 21.03s/it][A

tensor(0.6730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:26<19:39, 21.07s/it][A

tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:47<19:19, 21.07s/it][A

tensor(0.6210, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:08<18:59, 21.09s/it][A

tensor(0.6967, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:29<18:36, 21.07s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:50<18:15, 21.07s/it][A

tensor(0.6253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:11<17:53, 21.06s/it][A

tensor(0.6682, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:32<17:32, 21.04s/it][A

tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:53<17:10, 21.03s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:48, 21.02s/it][A

tensor(0.6974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:28, 21.03s/it][A

tensor(0.6470, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<16:08, 21.05s/it][A

tensor(0.6810, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:47, 21.05s/it][A

tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:23, 20.99s/it][A

tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:02, 21.00s/it][A

tensor(0.6372, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:43, 21.04s/it][A

tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:41<14:21, 21.02s/it][A

tensor(0.6367, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:02<14:02, 21.06s/it][A

tensor(0.7165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:23<13:40, 21.05s/it][A

tensor(0.6714, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:44<13:19, 21.05s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:06<12:59, 21.06s/it][A

tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:37, 21.03s/it][A

tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:48<12:17, 21.08s/it][A

tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:09<11:56, 21.06s/it][A

tensor(0.6640, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:30<11:34, 21.06s/it][A

tensor(0.6566, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:51<11:13, 21.06s/it][A

tensor(0.7126, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:12<10:54, 21.10s/it][A

tensor(0.6826, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:33<10:32, 21.07s/it][A

loss: tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:54<10:09, 21.01s/it][A

loss: tensor(0.6798, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:15<09:47, 21.00s/it][A

loss: tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:36<09:27, 21.01s/it][A

tensor(0.6336, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:57<09:05, 20.98s/it][A

tensor(0.6357, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:18<08:44, 20.97s/it][A

tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:39<08:22, 20.92s/it][A

tensor(0.6268, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:59<08:00, 20.90s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:21<07:41, 20.98s/it][A

tensor(0.6588, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:41<07:19, 20.94s/it][A

tensor(0.6541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:02<06:59, 20.96s/it][A

tensor(0.6971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:23<06:37, 20.92s/it][A

tensor(0.6016, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:44<06:16, 20.89s/it][A

tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:05<05:56, 20.95s/it][A

tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:26<05:35, 20.98s/it][A

tensor(0.6251, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:47<05:14, 20.95s/it][A

loss: tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [24:08<04:52, 20.92s/it][A

loss: tensor(0.6628, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:29<04:31, 20.90s/it][A

loss: tensor(0.6576, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:50<04:11, 20.99s/it][A

loss: tensor(0.6615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:11<03:50, 20.98s/it][A

tensor(0.7032, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:32<03:30, 21.04s/it][A

tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:53<03:09, 21.05s/it][A

tensor(0.6292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:14<02:48, 21.05s/it][A

tensor(0.6913, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:35<02:27, 21.07s/it][A

tensor(0.6478, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:56<02:06, 21.06s/it][A

tensor(0.6560, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:17<01:45, 21.02s/it][A

tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:38<01:23, 20.97s/it][A

tensor(0.6808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:59<01:02, 20.97s/it][A

tensor(0.6382, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:20<00:41, 20.97s/it][A

tensor(0.6884, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:41<00:20, 20.96s/it][A

tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:48<00:00, 20.83s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5916, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6574062669133566

	train acc: 0.6275328587075575

	training prec: 0.832503442145532

	training rec: 0.6275328587075575

	training f1: 0.6902367650046031

	Current Learning rate:  5.7142857142857145e-06



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.61s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.62s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.63s/it][A
 14%|█▍        | 6/42 [00:15<01:33,  2.59s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:20<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.65s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.6402392898287091

	Validation acc: 0.6631944444444444

	Validation prec: 0.8711301655796999

	Validation rec: 0.6631944444444444

	Validation f1: 0.7213484697911606
loss: 


  1%|          | 1/83 [00:21<28:49, 21.09s/it][A

tensor(0.6941, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:21, 21.01s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:56, 20.96s/it][A

tensor(0.6386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:39, 21.01s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:14, 20.96s/it][A

tensor(0.6274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:51, 20.93s/it][A

tensor(0.6445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:31, 20.95s/it][A

tensor(0.7172, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:12, 20.96s/it][A

tensor(0.6259, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:56, 21.03s/it][A

tensor(0.6194, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:37, 21.07s/it][A

tensor(0.6920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:51<25:15, 21.04s/it][A

tensor(0.7047, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:12<24:52, 21.01s/it][A

tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:27, 20.97s/it][A

tensor(0.6392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:05, 20.95s/it][A

tensor(0.5994, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:43, 20.94s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:23, 20.96s/it][A

tensor(0.6034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:01, 20.94s/it][A

tensor(0.6678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:40, 20.93s/it][A

tensor(0.6208, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:22, 20.97s/it][A

tensor(0.6695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<22:03, 21.01s/it][A

tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:42, 21.00s/it][A

tensor(0.6923, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:18, 20.96s/it][A

tensor(0.6536, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<20:57, 20.95s/it][A

tensor(0.6598, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:35, 20.94s/it][A

tensor(0.6594, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:44<20:15, 20.96s/it][A

tensor(0.7096, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [09:05<19:52, 20.91s/it][A

loss: tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:25<19:29, 20.88s/it][A

loss: tensor(0.6625, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:46<19:07, 20.87s/it][A

loss: tensor(0.6479, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:07<18:45, 20.84s/it][A

loss: tensor(0.6441, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:28<18:24, 20.84s/it][A

loss: tensor(0.6601, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:49<18:01, 20.80s/it][A

loss: tensor(0.6059, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:09<17:39, 20.77s/it][A

loss: tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:30<17:18, 20.77s/it][A

loss: tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:51<16:58, 20.79s/it][A

loss: tensor(0.6609, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:12<16:37, 20.79s/it][A

loss: tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:33<16:18, 20.83s/it][A

loss: tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:53<15:56, 20.80s/it][A

loss: tensor(0.6696, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:14<15:36, 20.81s/it][A

loss: tensor(0.6350, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:35<15:14, 20.79s/it][A

loss: tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:56<14:53, 20.77s/it][A

loss: tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:16<14:30, 20.73s/it][A

loss: tensor(0.6720, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:37<14:10, 20.74s/it][A

loss: tensor(0.6902, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:58<13:50, 20.77s/it][A

tensor(0.6603, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:19<13:29, 20.77s/it][A

loss: tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:39<13:08, 20.75s/it][A

loss: tensor(0.6175, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [16:00<12:46, 20.70s/it][A

loss: tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:21<12:24, 20.69s/it][A

loss: tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:41<12:05, 20.73s/it][A

loss: tensor(0.6657, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:02<11:44, 20.73s/it][A

loss: tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:23<11:23, 20.72s/it][A

loss: tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:44<11:02, 20.71s/it][A

loss: tensor(0.6959, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:04<10:42, 20.74s/it][A

loss: tensor(0.7164, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:25<10:21, 20.72s/it][A

loss: tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:46<10:01, 20.75s/it][A

loss: tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:07<09:40, 20.74s/it][A

loss: tensor(0.7030, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:27<09:20, 20.77s/it][A

loss: tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:48<09:00, 20.79s/it][A

loss: tensor(0.5877, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:09<08:40, 20.81s/it][A

loss: tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:30<08:19, 20.82s/it][A

loss: tensor(0.6791, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:51<07:59, 20.87s/it][A

loss: tensor(0.6542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:12<07:40, 20.91s/it][A

tensor(0.6563, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:33<07:20, 20.96s/it][A

tensor(0.6896, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:54<06:59, 20.99s/it][A

tensor(0.6548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:15<06:38, 20.96s/it][A

tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:36<06:16, 20.92s/it][A

tensor(0.6211, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:57<05:55, 20.91s/it][A

tensor(0.6976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:18<05:34, 20.93s/it][A

tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:39<05:13, 20.91s/it][A

tensor(0.6380, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:59<04:52, 20.87s/it][A

loss: tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:20<04:31, 20.87s/it][A

tensor(0.6694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:41<04:10, 20.88s/it][A

tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:02<03:49, 20.83s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:23<03:28, 20.82s/it][A

tensor(0.6444, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:44<03:07, 20.86s/it][A

tensor(0.6373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:05<02:47, 20.88s/it][A

tensor(0.5900, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:25<02:26, 20.89s/it][A

tensor(0.6574, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:47<02:05, 20.97s/it][A

tensor(0.6460, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:08<01:44, 20.98s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:29<01:24, 21.02s/it][A

tensor(0.6202, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:50<01:03, 21.01s/it][A

tensor(0.6443, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:11<00:42, 21.00s/it][A

tensor(0.6027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:32<00:21, 21.07s/it][A

tensor(0.6176, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:39<00:00, 20.72s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6319, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6529772583260594

	train acc: 0.6680414841182913

	training prec: 0.8312749555154106

	training rec: 0.6680414841182913

	training f1: 0.7228409674472079

	Current Learning rate:  5.428571428571429e-06



  2%|▏         | 1/42 [00:02<01:47,  2.62s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:08<01:44,  2.68s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.67s/it][A
 14%|█▍        | 6/42 [00:16<01:36,  2.67s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.67s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:39<01:12,  2.67s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.65s/it][A
 43%|████▎     | 18/42 [00:47<01:04,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6384294416223254

	Validation acc: 0.6644345238095238

	Validation prec: 0.8703306324569565

	Validation rec: 0.6644345238095238

	Validation f1: 0.7233514712029349
loss: 


  1%|          | 1/83 [00:20<28:17, 20.70s/it][A

tensor(0.6199, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:01, 20.76s/it][A

tensor(0.6627, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:43, 20.79s/it][A

tensor(0.6278, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:22, 20.79s/it][A

tensor(0.7007, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:05, 20.85s/it][A

tensor(0.6482, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:44, 20.84s/it][A

tensor(0.6681, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:27, 20.89s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:09, 20.92s/it][A

tensor(0.6719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:47, 20.91s/it][A

tensor(0.6781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:26, 20.91s/it][A

tensor(0.6636, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:02, 20.87s/it][A

tensor(0.6979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:41, 20.87s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:21, 20.88s/it][A

tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<23:59, 20.87s/it][A

tensor(0.6630, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:41, 20.90s/it][A

tensor(0.6189, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:33<23:16, 20.85s/it][A

tensor(0.6738, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:54<22:55, 20.84s/it][A

tensor(0.6073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:15<22:34, 20.84s/it][A

tensor(0.6672, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:11, 20.80s/it][A

tensor(0.6211, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:56<21:49, 20.79s/it][A

tensor(0.6689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:17<21:31, 20.83s/it][A

tensor(0.6267, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:38<21:11, 20.85s/it][A

tensor(0.6058, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:59<20:51, 20.87s/it][A

tensor(0.6976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:20<20:29, 20.84s/it][A

tensor(0.6595, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:41<20:09, 20.85s/it][A

tensor(0.6555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:02<19:47, 20.84s/it][A

tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:22<19:28, 20.86s/it][A

tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:43<19:04, 20.81s/it][A

tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:04<18:44, 20.82s/it][A

tensor(0.6773, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:25<18:26, 20.88s/it][A

tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:46<18:05, 20.87s/it][A

tensor(0.6139, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:07<17:42, 20.84s/it][A

tensor(0.6379, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:28<17:22, 20.85s/it][A

tensor(0.6643, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:48<17:01, 20.84s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:09<16:40, 20.84s/it][A

tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:30<16:21, 20.88s/it][A

tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:51<15:58, 20.85s/it][A

tensor(0.6189, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:12<15:38, 20.86s/it][A

tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:33<15:18, 20.88s/it][A

tensor(0.6044, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:54<14:58, 20.89s/it][A

tensor(0.7084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:15<14:37, 20.88s/it][A

tensor(0.6425, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:35<14:16, 20.88s/it][A

tensor(0.6575, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:56<13:54, 20.87s/it][A

tensor(0.6796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:17<13:35, 20.91s/it][A

tensor(0.5619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:38<13:14, 20.90s/it][A

tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:59<12:53, 20.89s/it][A

tensor(0.6916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:20<12:33, 20.93s/it][A

tensor(0.6879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:41<12:12, 20.92s/it][A

tensor(0.6492, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:02<11:50, 20.89s/it][A

tensor(0.6441, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:23<11:29, 20.88s/it][A

tensor(0.6657, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:43<11:07, 20.87s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:04<10:47, 20.90s/it][A

tensor(0.6341, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:25<10:27, 20.90s/it][A

tensor(0.6795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:46<10:06, 20.92s/it][A

tensor(0.6472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:07<09:46, 20.94s/it][A

tensor(0.6796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:28<09:25, 20.93s/it][A

tensor(0.6484, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:49<09:04, 20.95s/it][A

tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:10<08:43, 20.94s/it][A

tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:31<08:22, 20.94s/it][A

tensor(0.6826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:52<08:01, 20.91s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:13<07:39, 20.88s/it][A

tensor(0.6680, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:33<07:18, 20.86s/it][A

tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:54<06:56, 20.83s/it][A

tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:15<06:35, 20.81s/it][A

tensor(0.6514, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:36<06:14, 20.78s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:57<05:53, 20.80s/it][A

tensor(0.5907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:17<05:32, 20.81s/it][A

tensor(0.6763, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:38<05:10, 20.72s/it][A

loss: tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:59<04:49, 20.70s/it][A

tensor(0.6909, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:19<04:29, 20.76s/it][A

tensor(0.6608, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:40<04:09, 20.77s/it][A

tensor(0.6857, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:01<03:49, 20.83s/it][A

tensor(0.6369, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:22<03:28, 20.85s/it][A

tensor(0.7145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:43<03:08, 20.91s/it][A

tensor(0.6157, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:04<02:46, 20.87s/it][A

tensor(0.6585, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:25<02:25, 20.85s/it][A

tensor(0.6657, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:46<02:05, 20.85s/it][A

tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:06<01:44, 20.80s/it][A

tensor(0.6366, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:27<01:23, 20.87s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:48<01:02, 20.85s/it][A

tensor(0.6497, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:09<00:41, 20.88s/it][A

tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:30<00:20, 20.89s/it][A

tensor(0.6392, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:37<00:00, 20.69s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7625, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6568660506282944

	train acc: 0.6461014512595837

	training prec: 0.8295078848814038

	training rec: 0.6461014512595837

	training f1: 0.7052926216401629

	Current Learning rate:  5.142857142857142e-06



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.61s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.62s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.61s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6364724054223015

	Validation acc: 0.6763392857142857

	Validation prec: 0.8691709919316117

	Validation rec: 0.6763392857142857

	Validation f1: 0.734307870798451
loss: 


  1%|          | 1/83 [00:20<28:33, 20.90s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:15, 20.94s/it][A

tensor(0.6673, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:51, 20.90s/it][A

tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:27, 20.86s/it][A

tensor(0.6610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:03, 20.82s/it][A

tensor(0.6176, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:41, 20.80s/it][A

tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:23, 20.83s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:06, 20.89s/it][A

tensor(0.6359, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:45, 20.89s/it][A

tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:23, 20.87s/it][A

tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:07, 20.93s/it][A

tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:43, 20.90s/it][A

tensor(0.6627, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:30<23:56, 20.52s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:50<23:39, 20.57s/it][A

loss: tensor(0.6310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:11<23:26, 20.68s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:32<23:09, 20.74s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:53<22:56, 20.85s/it][A

tensor(0.6261, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:14<22:34, 20.83s/it][A

loss: tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:35<22:11, 20.80s/it][A

loss: tensor(0.6429, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:56<21:51, 20.82s/it][A

loss: tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:17<21:30, 20.82s/it][A

loss: tensor(0.6647, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:37<21:10, 20.83s/it][A

loss: tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:58<20:47, 20.79s/it][A

loss: tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:19<20:24, 20.76s/it][A

loss: tensor(0.6930, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:40<20:03, 20.76s/it][A

loss: tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [09:00<19:41, 20.73s/it][A

loss: tensor(0.6345, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:21<19:19, 20.70s/it][A

loss: tensor(0.5793, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:41<18:57, 20.69s/it][A

loss: tensor(0.6288, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:02<18:36, 20.68s/it][A

loss: tensor(0.6757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:23<18:17, 20.71s/it][A

tensor(0.6207, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:44<17:57, 20.72s/it][A

tensor(0.6778, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:05<17:40, 20.79s/it][A

tensor(0.6584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:26<17:23, 20.86s/it][A

tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:47<17:03, 20.88s/it][A

tensor(0.7025, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:08<16:43, 20.90s/it][A

tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:29<16:24, 20.95s/it][A

tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:50<16:03, 20.95s/it][A

tensor(0.6384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:10<15:42, 20.94s/it][A

tensor(0.6373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:31<15:22, 20.96s/it][A

tensor(0.6300, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:52<15:00, 20.95s/it][A

tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:13<14:39, 20.95s/it][A

loss: tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:34<14:17, 20.92s/it][A

loss: tensor(0.5992, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:55<13:55, 20.89s/it][A

loss: tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:16<13:35, 20.90s/it][A

loss: tensor(0.6418, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:37<13:12, 20.84s/it][A

loss: tensor(0.6579, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:57<12:49, 20.79s/it][A

loss: tensor(0.6563, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:18<12:27, 20.76s/it][A

loss: tensor(0.6200, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:39<12:05, 20.72s/it][A

loss: tensor(0.6976, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [16:59<11:44, 20.71s/it][A

loss: tensor(0.6152, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:20<11:23, 20.70s/it][A

loss: tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:41<11:04, 20.76s/it][A

tensor(0.6493, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:02<10:44, 20.78s/it][A

tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:23<10:25, 20.83s/it][A

loss: tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:44<10:05, 20.86s/it][A

loss: tensor(0.6971, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:04<09:44, 20.86s/it][A

loss: tensor(0.6645, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:25<09:23, 20.88s/it][A

loss: tensor(0.6433, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:47<09:05, 20.98s/it][A

tensor(0.6213, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:08<08:45, 21.01s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:29<08:24, 21.02s/it][A

tensor(0.6720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:50<08:03, 21.04s/it][A

tensor(0.6345, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:11<07:42, 21.01s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:32<07:19, 20.95s/it][A

tensor(0.6000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:52<06:59, 20.95s/it][A

tensor(0.6218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:13<06:38, 20.95s/it][A

tensor(0.6478, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:34<06:17, 20.96s/it][A

tensor(0.6774, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:55<05:54, 20.87s/it][A

loss: tensor(0.7058, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:16<05:34, 20.90s/it][A

tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:37<05:13, 20.90s/it][A

loss: tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:58<04:52, 20.87s/it][A

loss: tensor(0.6852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:19<04:31, 20.88s/it][A

tensor(0.6109, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:40<04:10, 20.90s/it][A

tensor(0.6708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:01<03:50, 20.94s/it][A

tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:22<03:29, 20.96s/it][A

tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:43<03:08, 20.95s/it][A

tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:04<02:47, 20.97s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:25<02:27, 21.05s/it][A

tensor(0.6279, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:46<02:06, 21.07s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:07<01:45, 21.08s/it][A

tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:28<01:24, 21.11s/it][A

tensor(0.6445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:49<01:03, 21.12s/it][A

tensor(0.5965, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:10<00:42, 21.11s/it][A

tensor(0.6775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:32<00:21, 21.12s/it][A

tensor(0.6303, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:39<00:00, 20.72s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6497761398912912

	train acc: 0.6571399233296823

	training prec: 0.8386587613538129

	training rec: 0.6571399233296823

	training f1: 0.7155677255191104

	Current Learning rate:  4.857142857142858e-06



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.67s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6345671330179486

	Validation acc: 0.683531746031746

	Validation prec: 0.8655211432988533

	Validation rec: 0.683531746031746

	Validation f1: 0.7380641672821799
loss: 


  1%|          | 1/83 [00:21<28:50, 21.10s/it][A

tensor(0.6962, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:20, 21.00s/it][A

tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:02, 21.04s/it][A

tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:42, 21.04s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:22, 21.05s/it][A

tensor(0.5992, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:01, 21.06s/it][A

tensor(0.7085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:40, 21.06s/it][A

tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:18, 21.05s/it][A

tensor(0.6253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:09<25:58, 21.06s/it][A

tensor(0.7245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:30<25:36, 21.05s/it][A

tensor(0.6234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:51<25:16, 21.06s/it][A

tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:12<24:59, 21.12s/it][A

tensor(0.6454, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:33<24:38, 21.13s/it][A

tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:55<24:17, 21.13s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:16<23:57, 21.14s/it][A

tensor(0.6581, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:37<23:37, 21.16s/it][A

tensor(0.6732, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:58<23:17, 21.18s/it][A

tensor(0.6172, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:19<22:58, 21.21s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:41<22:42, 21.28s/it][A

tensor(0.6318, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [07:02<22:19, 21.26s/it][A

loss: tensor(0.6657, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:23<21:56, 21.23s/it][A

loss: tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:44<21:32, 21.19s/it][A

loss: tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [08:05<21:07, 21.13s/it][A

loss: tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:26<20:44, 21.10s/it][A

loss: tensor(0.6482, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:47<20:22, 21.08s/it][A

loss: tensor(0.6074, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [09:09<20:02, 21.09s/it][A

loss: tensor(0.6824, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:30<19:40, 21.09s/it][A

loss: tensor(0.6578, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:51<19:21, 21.12s/it][A

tensor(0.7052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:12<19:01, 21.13s/it][A

tensor(0.6791, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:33<18:42, 21.19s/it][A

tensor(0.6065, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:54<18:17, 21.10s/it][A

tensor(0.6740, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:15<17:53, 21.06s/it][A

tensor(0.6913, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:36<17:29, 21.00s/it][A

tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:57<17:06, 20.96s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:18<16:47, 20.99s/it][A

tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:39<16:26, 20.99s/it][A

tensor(0.7218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:00<16:03, 20.95s/it][A

tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:21<15:41, 20.91s/it][A

tensor(0.6456, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:41<15:19, 20.90s/it][A

tensor(0.6474, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:02<14:58, 20.89s/it][A

tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:23<14:36, 20.88s/it][A

tensor(0.6747, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:44<14:16, 20.88s/it][A

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:05<13:55, 20.89s/it][A

tensor(0.6241, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:26<13:35, 20.91s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:47<13:14, 20.90s/it][A

tensor(0.7004, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:08<12:55, 20.96s/it][A

tensor(0.6397, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:29<12:35, 21.00s/it][A

tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:50<12:17, 21.07s/it][A

tensor(0.6690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:11<11:57, 21.10s/it][A

tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:33<11:37, 21.14s/it][A

tensor(0.6186, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:54<11:15, 21.10s/it][A

tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:15<10:53, 21.08s/it][A

tensor(0.6616, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:36<10:32, 21.08s/it][A

tensor(0.6303, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:57<10:12, 21.12s/it][A

tensor(0.7294, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:18<09:52, 21.15s/it][A

tensor(0.7018, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:39<09:30, 21.15s/it][A

tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:00<09:09, 21.12s/it][A

tensor(0.6733, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:21<08:47, 21.10s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:43<08:26, 21.08s/it][A

tensor(0.7002, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:04<08:04, 21.06s/it][A

tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:24<07:42, 21.01s/it][A

tensor(0.7244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:45<07:21, 21.02s/it][A

tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:06<06:59, 20.98s/it][A

tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:27<06:39, 21.00s/it][A

tensor(0.7047, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:48<06:18, 21.02s/it][A

tensor(0.7041, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:09<05:57, 21.02s/it][A

tensor(0.6486, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:30<05:36, 21.01s/it][A

tensor(0.6453, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:51<05:13, 20.92s/it][A

tensor(0.6618, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:12<04:52, 20.87s/it][A

tensor(0.6527, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:33<04:31, 20.89s/it][A

tensor(0.7091, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:54<04:11, 20.93s/it][A

tensor(0.6497, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:15<03:50, 20.93s/it][A

tensor(0.6357, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:36<03:29, 20.93s/it][A

tensor(0.7068, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:57<03:08, 20.93s/it][A

tensor(0.6141, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:18<02:47, 20.96s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:39<02:26, 20.94s/it][A

tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:00<02:05, 20.97s/it][A

tensor(0.6340, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:21<01:44, 20.96s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:42<01:23, 20.98s/it][A

tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:03<01:02, 20.96s/it][A

tensor(0.6431, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:24<00:41, 21.00s/it][A

tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:45<00:21, 21.01s/it][A

tensor(0.6848, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:52<00:00, 20.87s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6077, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6569397679294449

	train acc: 0.656335569550931

	training prec: 0.8294038130105974

	training rec: 0.656335569550931

	training f1: 0.7141344533228059

	Current Learning rate:  4.571428571428572e-06



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.61s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.60s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.60s/it][A
 19%|█▉        | 8/42 [00:20<01:29,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.62s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.61s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6341737522965386

	Validation acc: 0.6560019841269841

	Validation prec: 0.8686970213871306

	Validation rec: 0.6560019841269841

	Validation f1: 0.714178417833182
loss: 


  1%|          | 1/83 [00:21<28:50, 21.10s/it][A

tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:28, 21.09s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:07, 21.10s/it][A

tensor(0.6582, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:44, 21.06s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:45<27:20, 21.03s/it][A

loss: tensor(0.7387, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:06<26:57, 21.01s/it][A

loss: tensor(0.6503, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:27<26:31, 20.94s/it][A

loss: tensor(0.6795, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:47<26:08, 20.91s/it][A

loss: tensor(0.5915, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:08<25:41, 20.84s/it][A

loss: tensor(0.6146, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:29<25:20, 20.83s/it][A

loss: tensor(0.6598, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:50<25:03, 20.89s/it][A

loss: tensor(0.6568, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:11<24:42, 20.87s/it][A

loss: tensor(0.6295, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:32<24:19, 20.84s/it][A

loss: tensor(0.6983, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:52<23:55, 20.81s/it][A

loss: tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:40, 20.90s/it][A

tensor(0.6473, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:20, 20.91s/it][A

tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<23:01, 20.93s/it][A

tensor(0.6985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:39, 20.91s/it][A

tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:18, 20.92s/it][A

tensor(0.6783, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:59, 20.94s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:38, 20.94s/it][A

tensor(0.6747, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:40<21:18, 20.96s/it][A

tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:01<20:57, 20.95s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:34, 20.93s/it][A

tensor(0.6515, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:14, 20.95s/it][A

tensor(0.6808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:56, 20.99s/it][A

tensor(0.6156, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:36, 21.01s/it][A

tensor(0.7329, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:16, 21.03s/it][A

tensor(0.6112, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<18:55, 21.02s/it][A

tensor(0.6248, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:28<18:35, 21.04s/it][A

tensor(0.6514, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:14, 21.05s/it][A

tensor(0.6901, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:10<17:52, 21.03s/it][A

tensor(0.6518, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:30, 21.02s/it][A

tensor(0.6316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<17:10, 21.03s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:13<16:49, 21.02s/it][A

tensor(0.6290, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:34<16:26, 21.00s/it][A

tensor(0.6800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:55<16:06, 21.02s/it][A

tensor(0.6773, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:16<15:45, 21.02s/it][A

tensor(0.6943, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:25, 21.03s/it][A

tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<15:04, 21.04s/it][A

tensor(0.6194, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:19<14:43, 21.04s/it][A

tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:40<14:22, 21.03s/it][A

tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:02<14:02, 21.06s/it][A

tensor(0.6896, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:23<13:42, 21.08s/it][A

tensor(0.6199, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:44<13:21, 21.10s/it][A

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:05<13:00, 21.09s/it][A

tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:39, 21.09s/it][A

tensor(0.6535, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:47<12:17, 21.06s/it][A

tensor(0.6476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:08<11:56, 21.07s/it][A

tensor(0.6067, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:29<11:34, 21.06s/it][A

tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:50<11:13, 21.06s/it][A

tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:11<10:53, 21.07s/it][A

tensor(0.6455, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:32<10:32, 21.08s/it][A

tensor(0.6791, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:53<10:11, 21.08s/it][A

tensor(0.7163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:14<09:49, 21.07s/it][A

tensor(0.6665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:35<09:28, 21.04s/it][A

tensor(0.6578, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:56<09:06, 21.03s/it][A

tensor(0.6574, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:18<08:46, 21.06s/it][A

tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:39<08:26, 21.09s/it][A

tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:00<08:06, 21.14s/it][A

tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:21<07:45, 21.16s/it][A

tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:42<07:23, 21.14s/it][A

tensor(0.6882, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [22:03<07:02, 21.12s/it][A

loss: tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:24<06:40, 21.06s/it][A

loss: tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:45<06:18, 21.01s/it][A

loss: tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [23:06<05:56, 20.99s/it][A

loss: tensor(0.6587, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:27<05:36, 21.02s/it][A

loss: tensor(0.6739, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:48<05:15, 21.04s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:09<04:54, 21.04s/it][A

tensor(0.6475, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:30<04:33, 21.03s/it][A

tensor(0.6411, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:51<04:12, 21.04s/it][A

tensor(0.6706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:13<03:51, 21.07s/it][A

tensor(0.6706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:34<03:30, 21.06s/it][A

tensor(0.6292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:55<03:09, 21.07s/it][A

tensor(0.6302, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:16<02:48, 21.07s/it][A

tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:37<02:27, 21.09s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:58<02:06, 21.11s/it][A

tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:19<01:45, 21.14s/it][A

tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:40<01:24, 21.12s/it][A

tensor(0.6492, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:01<01:03, 21.11s/it][A

tensor(0.6143, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:23<00:42, 21.10s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:44<00:21, 21.12s/it][A

tensor(0.6397, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:51<00:00, 20.86s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6294, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6528147028153202

	train acc: 0.6307331599123768

	training prec: 0.8307685963225867

	training rec: 0.6307331599123768

	training f1: 0.694047488008971

	Current Learning rate:  4.2857142857142855e-06



  2%|▏         | 1/42 [00:02<01:48,  2.65s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.68s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.63s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6321888736316136

	Validation acc: 0.6909722222222222

	Validation prec: 0.8642685361932937

	Validation rec: 0.6909722222222222

	Validation f1: 0.7447795305266866
loss: 


  1%|          | 1/83 [00:20<28:35, 20.93s/it][A

tensor(0.6186, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:15, 20.93s/it][A

tensor(0.6593, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:53, 20.91s/it][A

tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:26, 20.85s/it][A

tensor(0.6233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<26:56, 20.73s/it][A

tensor(0.6482, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:42, 20.81s/it][A

tensor(0.6248, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:24, 20.84s/it][A

tensor(0.6295, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:04, 20.86s/it][A

tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:47, 20.91s/it][A

tensor(0.6043, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:24, 20.89s/it][A

tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:04, 20.89s/it][A

tensor(0.6553, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:42, 20.88s/it][A

tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:20, 20.86s/it][A

tensor(0.6644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<23:59, 20.86s/it][A

tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:12<23:38, 20.86s/it][A

tensor(0.6482, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:33<23:17, 20.85s/it][A

tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:54<22:57, 20.88s/it][A

tensor(0.6510, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:15<22:40, 20.93s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:15, 20.87s/it][A

tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:57<21:52, 20.83s/it][A

tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:18<21:31, 20.84s/it][A

tensor(0.6478, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:38<21:11, 20.84s/it][A

tensor(0.7078, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:59<20:49, 20.82s/it][A

tensor(0.6129, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:20<20:29, 20.83s/it][A

tensor(0.6713, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:41<20:10, 20.87s/it][A

tensor(0.6560, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:02<19:48, 20.85s/it][A

tensor(0.6577, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:23<19:26, 20.83s/it][A

tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:43<19:03, 20.79s/it][A

tensor(0.6169, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:04<18:42, 20.78s/it][A

tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:25<18:20, 20.76s/it][A

tensor(0.6294, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:46<18:00, 20.78s/it][A

tensor(0.6666, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:06<17:41, 20.81s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:27<17:22, 20.85s/it][A

tensor(0.6537, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:48<17:03, 20.88s/it][A

tensor(0.6325, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:09<16:42, 20.90s/it][A

tensor(0.6694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:30<16:23, 20.93s/it][A

tensor(0.6169, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:51<16:01, 20.91s/it][A

tensor(0.6707, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:12<15:44, 20.98s/it][A

tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:33<15:23, 20.99s/it][A

tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:54<15:03, 21.01s/it][A

tensor(0.6904, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:16<14:43, 21.04s/it][A

tensor(0.6205, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:36<14:21, 21.02s/it][A

tensor(0.6693, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:58<14:02, 21.07s/it][A

tensor(0.6739, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:19<13:42, 21.08s/it][A

tensor(0.7193, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:40<13:22, 21.13s/it][A

tensor(0.6614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:01<13:01, 21.11s/it][A

tensor(0.6607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:22<12:41, 21.14s/it][A

tensor(0.6318, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:43<12:17, 21.08s/it][A

tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:04<11:56, 21.09s/it][A

tensor(0.6342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:25<11:35, 21.08s/it][A

tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:46<11:13, 21.06s/it][A

tensor(0.6524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:07<10:52, 21.05s/it][A

tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:28<10:31, 21.05s/it][A

loss: tensor(0.6268, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:49<10:09, 21.02s/it][A

loss: tensor(0.6750, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:10<09:47, 20.99s/it][A

loss: tensor(0.6403, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:31<09:26, 20.98s/it][A

loss: tensor(0.6861, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:52<09:04, 20.96s/it][A

tensor(0.6524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:13<08:44, 20.96s/it][A

tensor(0.6399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:34<08:22, 20.94s/it][A

tensor(0.6351, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:55<08:01, 20.94s/it][A

tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:16<07:41, 20.99s/it][A

tensor(0.6738, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:37<07:20, 20.97s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:58<07:00, 21.02s/it][A

loss: tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:19<06:38, 21.00s/it][A

tensor(0.6460, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:40<06:18, 21.00s/it][A

loss: tensor(0.6270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:01<05:57, 21.00s/it][A

tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:22<05:36, 21.04s/it][A

tensor(0.7031, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:43<05:16, 21.08s/it][A

tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:05<04:55, 21.09s/it][A

tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:26<04:34, 21.12s/it][A

tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:47<04:13, 21.11s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:08<03:52, 21.14s/it][A

tensor(0.6785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:29<03:31, 21.14s/it][A

tensor(0.6707, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:50<03:10, 21.14s/it][A

tensor(0.6369, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:11<02:49, 21.14s/it][A

tensor(0.5978, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:33<02:27, 21.14s/it][A

tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:54<02:06, 21.15s/it][A

tensor(0.6839, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:15<01:45, 21.19s/it][A

tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:36<01:24, 21.11s/it][A

tensor(0.6179, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:57<01:03, 21.05s/it][A

tensor(0.6411, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:18<00:41, 20.99s/it][A

tensor(0.6472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:39<00:20, 20.96s/it][A

tensor(0.6324, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:46<00:00, 20.80s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6460449889481786

	train acc: 0.664071056955093

	training prec: 0.8339192351367463

	training rec: 0.664071056955093

	training f1: 0.7203724681818725

	Current Learning rate:  4.000000000000001e-06



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.67s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:37<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.65s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.6318867490405128

	Validation acc: 0.6770833333333334

	Validation prec: 0.8580847407921985

	Validation rec: 0.6770833333333334

	Validation f1: 0.7319875685990292
loss: 


  1%|          | 1/83 [00:20<28:28, 20.83s/it][A

tensor(0.6490, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:20, 20.99s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:58, 20.98s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:42, 21.04s/it][A

tensor(0.6181, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:23, 21.07s/it][A

tensor(0.6032, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<26:59, 21.04s/it][A

tensor(0.6223, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:33, 20.97s/it][A

tensor(0.6829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:10, 20.94s/it][A

tensor(0.6455, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:46, 20.90s/it][A

tensor(0.6399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:24, 20.88s/it][A

tensor(0.6710, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:09, 20.97s/it][A

tensor(0.6020, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:52, 21.03s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:32, 21.04s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:12, 21.05s/it][A

tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:15<23:52, 21.06s/it][A

tensor(0.6132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:28, 21.02s/it][A

tensor(0.6605, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:06, 21.00s/it][A

tensor(0.5774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:40, 20.94s/it][A

tensor(0.6502, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:16, 20.89s/it][A

tensor(0.6612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<21:55, 20.88s/it][A

tensor(0.6062, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:33, 20.86s/it][A

tensor(0.7050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:14, 20.89s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<20:54, 20.92s/it][A

tensor(0.6412, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:41, 21.04s/it][A

tensor(0.6428, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:44<20:16, 20.97s/it][A

loss: tensor(0.6727, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [09:05<19:53, 20.94s/it][A

loss: tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:25<19:29, 20.89s/it][A

loss: tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:46<19:08, 20.88s/it][A

loss: tensor(0.6714, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:07<18:51, 20.96s/it][A

loss: tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:28<18:30, 20.95s/it][A

loss: tensor(0.6611, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:11, 21.00s/it][A

tensor(0.6597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:10<17:50, 20.98s/it][A

tensor(0.5920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:29, 20.99s/it][A

tensor(0.6372, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:53<17:11, 21.05s/it][A

tensor(0.6847, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:49, 21.04s/it][A

tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:30, 21.08s/it][A

tensor(0.6189, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<16:09, 21.09s/it][A

tensor(0.6493, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:49, 21.09s/it][A

tensor(0.6017, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:27, 21.09s/it][A

tensor(0.7148, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:05, 21.06s/it][A

tensor(0.6745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:44, 21.06s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:41<14:21, 21.00s/it][A

tensor(0.6342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:02<13:58, 20.96s/it][A

tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:23<13:38, 20.98s/it][A

tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:44<13:17, 20.98s/it][A

tensor(0.6677, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:05<12:55, 20.96s/it][A

tensor(0.6564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:35, 20.99s/it][A

tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:47<12:15, 21.02s/it][A

tensor(0.6364, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:08<11:53, 20.98s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:29<11:33, 21.00s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:50<11:14, 21.07s/it][A

tensor(0.6317, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:11<10:51, 21.02s/it][A

tensor(0.6430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:32<10:30, 21.03s/it][A

tensor(0.6418, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:53<10:09, 21.02s/it][A

tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:14<09:48, 21.02s/it][A

tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:35<09:27, 21.02s/it][A

tensor(0.7002, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:56<09:07, 21.05s/it][A

tensor(0.6406, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:18<08:48, 21.14s/it][A

tensor(0.6637, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:39<08:27, 21.16s/it][A

tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:00<08:04, 21.08s/it][A

tensor(0.6162, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:21<07:42, 21.02s/it][A

tensor(0.6628, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:41<07:20, 20.98s/it][A

tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [22:02<06:58, 20.92s/it][A

loss: tensor(0.6364, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:23<06:36, 20.87s/it][A

loss: tensor(0.6259, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:44<06:14, 20.83s/it][A

tensor(0.7296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:05<05:54, 20.85s/it][A

tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:25<05:33, 20.87s/it][A

tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:46<05:13, 20.87s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:07<04:51, 20.83s/it][A

tensor(0.6115, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:28<04:31, 20.86s/it][A

tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:49<04:10, 20.86s/it][A

tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:10<03:49, 20.88s/it][A

tensor(0.5917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:31<03:28, 20.85s/it][A

tensor(0.6930, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:51<03:07, 20.82s/it][A

tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:12<02:46, 20.87s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:33<02:26, 20.86s/it][A

tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:54<02:05, 20.94s/it][A

tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:15<01:44, 20.94s/it][A

tensor(0.6968, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:36<01:23, 20.94s/it][A

loss: tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:57<01:02, 20.93s/it][A

loss: tensor(0.6326, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [28:18<00:41, 20.96s/it][A

loss: tensor(0.6414, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:39<00:20, 21.00s/it][A

loss: tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:46<00:00, 20.81s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6717, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6436903677790998

	train acc: 0.6476074753559693

	training prec: 0.8402979531805855

	training rec: 0.6476074753559693

	training f1: 0.7070676957717215

	Current Learning rate:  3.7142857142857146e-06



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.62s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.62s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.63s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:15,  2.59s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.61s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6270743963264284

	Validation acc: 0.6939484126984127

	Validation prec: 0.8673740614505443

	Validation rec: 0.6939484126984127

	Validation f1: 0.745839839925626
loss: 


  1%|          | 1/83 [00:20<28:26, 20.82s/it][A

tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:06, 20.82s/it][A

tensor(0.6533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:47, 20.84s/it][A

tensor(0.6644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:27, 20.86s/it][A

tensor(0.6370, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:07, 20.86s/it][A

tensor(0.6591, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:45, 20.85s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:22, 20.82s/it][A

tensor(0.6705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:04, 20.85s/it][A

tensor(0.7058, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:40, 20.82s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:25, 20.89s/it][A

tensor(0.6854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:04, 20.90s/it][A

tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:49, 20.98s/it][A

tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:27, 20.96s/it][A

tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<24:06, 20.97s/it][A

tensor(0.5786, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:47, 21.00s/it][A

tensor(0.6954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:22, 20.93s/it][A

tensor(0.6306, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<22:58, 20.88s/it][A

tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:15<22:34, 20.84s/it][A

tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:12, 20.82s/it][A

tensor(0.6076, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:57<21:51, 20.82s/it][A

tensor(0.6132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:18<21:31, 20.83s/it][A

tensor(0.6370, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:39<21:08, 20.80s/it][A

tensor(0.5884, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:59<20:48, 20.80s/it][A

tensor(0.6472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:20<20:26, 20.79s/it][A

tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:41<20:07, 20.81s/it][A

tensor(0.6431, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:02<19:46, 20.82s/it][A

tensor(0.6720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:23<19:25, 20.81s/it][A

tensor(0.6691, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:44<19:05, 20.83s/it][A

tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:04<18:42, 20.79s/it][A

tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:25<18:23, 20.81s/it][A

tensor(0.6268, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:46<18:01, 20.80s/it][A

tensor(0.5926, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:07<17:38, 20.76s/it][A

tensor(0.7609, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:27<17:17, 20.75s/it][A

tensor(0.6749, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:48<16:57, 20.77s/it][A

tensor(0.6084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:09<16:36, 20.76s/it][A

tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:30<16:18, 20.82s/it][A

tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:51<15:58, 20.85s/it][A

tensor(0.6725, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:12<15:39, 20.88s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:33<15:18, 20.88s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:53<14:57, 20.88s/it][A

tensor(0.6153, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:14<14:39, 20.94s/it][A

tensor(0.6436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:36<14:19, 20.97s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:57<13:59, 20.98s/it][A

tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:18<13:38, 20.98s/it][A

tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:39<13:17, 20.99s/it][A

tensor(0.7217, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:00<12:58, 21.04s/it][A

tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:21<12:36, 21.00s/it][A

tensor(0.6412, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:42<12:16, 21.04s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:03<11:54, 21.00s/it][A

loss: tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:24<11:32, 21.00s/it][A

loss: tensor(0.7176, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:45<11:11, 20.98s/it][A

loss: tensor(0.6547, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:05<10:49, 20.95s/it][A

loss: tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:26<10:27, 20.91s/it][A

loss: tensor(0.6228, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:47<10:04, 20.86s/it][A

loss: tensor(0.6417, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:08<09:43, 20.82s/it][A

loss: tensor(0.6754, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:29<09:21, 20.81s/it][A

loss: tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:49<09:00, 20.80s/it][A

loss: tensor(0.6432, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:10<08:39, 20.78s/it][A

loss: tensor(0.6321, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:31<08:18, 20.76s/it][A

loss: tensor(0.6278, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:52<07:57, 20.78s/it][A

loss: tensor(0.5974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:12<07:36, 20.76s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:33<07:15, 20.75s/it][A

loss: tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:54<06:55, 20.79s/it][A

tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:15<06:34, 20.77s/it][A

tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:35<06:14, 20.79s/it][A

tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:56<05:53, 20.79s/it][A

tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:17<05:33, 20.85s/it][A

tensor(0.6077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:38<05:12, 20.86s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:59<04:51, 20.83s/it][A

tensor(0.6819, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:20<04:30, 20.81s/it][A

tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:40<04:09, 20.78s/it][A

tensor(0.6907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:01<03:48, 20.78s/it][A

tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:22<03:27, 20.77s/it][A

tensor(0.6597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:43<03:06, 20.77s/it][A

tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:03<02:46, 20.76s/it][A

tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:24<02:25, 20.77s/it][A

tensor(0.6815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:45<02:05, 20.86s/it][A

tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:06<01:44, 20.91s/it][A

tensor(0.6101, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:27<01:23, 20.90s/it][A

tensor(0.6579, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:48<01:02, 20.87s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:09<00:41, 20.88s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:30<00:20, 20.93s/it][A

tensor(0.6389, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:37<00:00, 20.69s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7018, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6460630053497223

	train acc: 0.6652176889375684

	training prec: 0.8433962979725284

	training rec: 0.6652176889375684

	training f1: 0.7221356202984668

	Current Learning rate:  3.428571428571429e-06



  2%|▏         | 1/42 [00:02<01:47,  2.62s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.62s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.62s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.61s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:34<01:15,  2.62s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:04,  2.59s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6279340769563403

	Validation acc: 0.6750992063492064

	Validation prec: 0.8656845192249187

	Validation rec: 0.6750992063492064

	Validation f1: 0.7314856142132072
loss: 


  1%|          | 1/83 [00:20<28:37, 20.95s/it][A

tensor(0.5938, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:08, 20.85s/it][A

tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:47, 20.85s/it][A

tensor(0.6533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:32, 20.91s/it][A

tensor(0.6666, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:12, 20.93s/it][A

tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:50, 20.92s/it][A

tensor(0.6982, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:31, 20.94s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:10, 20.95s/it][A

tensor(0.6615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:50, 20.95s/it][A

tensor(0.6742, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:27, 20.92s/it][A

tensor(0.6266, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<25:02, 20.87s/it][A

tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:41, 20.86s/it][A

tensor(0.6447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:21, 20.88s/it][A

tensor(0.6528, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<23:59, 20.86s/it][A

tensor(0.6930, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:37, 20.85s/it][A

tensor(0.6605, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:16, 20.84s/it][A

tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<22:58, 20.88s/it][A

tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:38, 20.90s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:16, 20.89s/it][A

tensor(0.6394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:57<21:54, 20.87s/it][A

tensor(0.6440, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:18<21:31, 20.83s/it][A

tensor(0.7184, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:39<21:11, 20.85s/it][A

tensor(0.6083, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:00<20:49, 20.83s/it][A

tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:20<20:26, 20.79s/it][A

tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:41<20:07, 20.81s/it][A

tensor(0.6066, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:02<19:43, 20.77s/it][A

tensor(0.5606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:23<19:22, 20.75s/it][A

tensor(0.6653, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:43<19:02, 20.77s/it][A

tensor(0.6354, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:04<18:42, 20.78s/it][A

tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:25<18:23, 20.82s/it][A

tensor(0.6375, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:46<18:03, 20.83s/it][A

tensor(0.6607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:07<17:42, 20.83s/it][A

tensor(0.5949, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:28<17:22, 20.85s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:49<17:01, 20.85s/it][A

tensor(0.6944, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:09<16:41, 20.86s/it][A

tensor(0.6033, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:30<16:20, 20.86s/it][A

tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:51<16:01, 20.91s/it][A

tensor(0.6545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:13<15:45, 21.01s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:34<15:26, 21.05s/it][A

tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:55<15:07, 21.10s/it][A

tensor(0.6628, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:16<14:46, 21.10s/it][A

tensor(0.6197, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:37<14:26, 21.12s/it][A

tensor(0.5911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:58<14:05, 21.13s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:20<13:44, 21.14s/it][A

tensor(0.6529, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:41<13:22, 21.13s/it][A

tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:02<13:03, 21.18s/it][A

tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:23<12:40, 21.14s/it][A

tensor(0.6096, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:44<12:20, 21.16s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:05<11:58, 21.12s/it][A

tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:26<11:36, 21.10s/it][A

tensor(0.6404, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:48<11:16, 21.14s/it][A

tensor(0.6621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:09<10:55, 21.14s/it][A

tensor(0.6495, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:30<10:33, 21.13s/it][A

tensor(0.7174, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:51<10:12, 21.13s/it][A

tensor(0.6357, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:12<09:51, 21.11s/it][A

tensor(0.6901, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:33<09:30, 21.14s/it][A

tensor(0.6690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:54<09:09, 21.13s/it][A

tensor(0.6710, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:15<08:47, 21.10s/it][A

tensor(0.7117, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:36<08:25, 21.08s/it][A

loss: tensor(0.6242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:57<08:03, 21.04s/it][A

tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:18<07:43, 21.08s/it][A

tensor(0.5927, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:39<07:22, 21.05s/it][A

tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:00<07:00, 21.04s/it][A

tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:22<06:40, 21.08s/it][A

tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:43<06:19, 21.09s/it][A

tensor(0.7479, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:04<05:58, 21.10s/it][A

tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:25<05:37, 21.09s/it][A

tensor(0.6027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:46<05:15, 21.04s/it][A

tensor(0.6682, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:07<04:53, 21.00s/it][A

tensor(0.6400, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:28<04:32, 20.94s/it][A

tensor(0.6492, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:48<04:11, 20.93s/it][A

tensor(0.6544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:09<03:50, 20.94s/it][A

tensor(0.6826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:31<03:30, 21.01s/it][A

tensor(0.6553, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:52<03:09, 21.04s/it][A

tensor(0.6415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:13<02:48, 21.03s/it][A

tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:34<02:27, 21.03s/it][A

tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:55<02:06, 21.04s/it][A

tensor(0.6552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:16<01:45, 21.05s/it][A

tensor(0.6270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:37<01:24, 21.06s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:58<01:03, 21.06s/it][A

tensor(0.7310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:19<00:42, 21.07s/it][A

tensor(0.6047, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:40<00:21, 21.11s/it][A

tensor(0.5786, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:48<00:00, 20.82s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5734, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6458706073014133

	train acc: 0.6478984118291347

	training prec: 0.8367760310542739

	training rec: 0.6478984118291347

	training f1: 0.7075838731875685

	Current Learning rate:  3.142857142857143e-06



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.68s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.66s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.65s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.66s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6262435487338475

	Validation acc: 0.6750992063492064

	Validation prec: 0.8705084781750055

	Validation rec: 0.6750992063492064

	Validation f1: 0.733393792274453
loss: 


  1%|          | 1/83 [00:21<29:09, 21.34s/it][A

tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:36, 21.20s/it][A

tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:15, 21.19s/it][A

tensor(0.6607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:52, 21.16s/it][A

tensor(0.6182, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:29, 21.15s/it][A

tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:06, 21.12s/it][A

tensor(0.6311, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:46, 21.14s/it][A

tensor(0.6493, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:49<26:23, 21.12s/it][A

tensor(0.6254, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:02, 21.11s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:31<25:41, 21.12s/it][A

tensor(0.6757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:52<25:18, 21.09s/it][A

tensor(0.6904, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:13<24:55, 21.06s/it][A

tensor(0.6379, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:34<24:36, 21.09s/it][A

tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:55<24:13, 21.07s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:16<23:53, 21.08s/it][A

tensor(0.6647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:37<23:33, 21.10s/it][A

tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:58<23:12, 21.10s/it][A

tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:20<22:51, 21.10s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:41<22:32, 21.13s/it][A

tensor(0.5980, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:02<22:13, 21.17s/it][A

tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:23<21:53, 21.19s/it][A

tensor(0.6689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:44<21:31, 21.17s/it][A

tensor(0.6196, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:05<21:04, 21.07s/it][A

tensor(0.6515, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:26<20:43, 21.07s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:47<20:18, 21.01s/it][A

tensor(0.7117, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:08<19:54, 20.96s/it][A

tensor(0.6663, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:29<19:29, 20.89s/it][A

tensor(0.6196, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:50<19:07, 20.86s/it][A

tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:10<18:43, 20.81s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:31<18:23, 20.81s/it][A

tensor(0.6578, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:52<18:01, 20.80s/it][A

tensor(0.5765, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:13<17:44, 20.86s/it][A

tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:34<17:23, 20.87s/it][A

tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:55<17:01, 20.85s/it][A

tensor(0.6352, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:15<16:41, 20.87s/it][A

tensor(0.6311, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:36<16:22, 20.91s/it][A

tensor(0.6406, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:57<16:01, 20.90s/it][A

tensor(0.6781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:18<15:42, 20.95s/it][A

tensor(0.5856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:39<15:19, 20.91s/it][A

tensor(0.6705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:00<14:58, 20.90s/it][A

tensor(0.5885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:21<14:38, 20.90s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:42<14:16, 20.88s/it][A

tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:03<13:56, 20.91s/it][A

tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:24<13:34, 20.88s/it][A

tensor(0.6960, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:44<13:12, 20.85s/it][A

loss: tensor(0.6066, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [16:05<12:51, 20.84s/it][A

loss: tensor(0.6682, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:26<12:29, 20.83s/it][A

loss: tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:47<12:09, 20.85s/it][A

loss: tensor(0.6370, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:08<11:49, 20.85s/it][A

loss: tensor(0.6984, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:29<11:28, 20.85s/it][A

tensor(0.6345, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:49<11:07, 20.85s/it][A

loss: tensor(0.6560, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:10<10:45, 20.82s/it][A

loss: tensor(0.6320, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:31<10:24, 20.80s/it][A

loss: tensor(0.5711, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:52<10:02, 20.78s/it][A

loss: tensor(0.6186, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:12<09:41, 20.76s/it][A

loss: tensor(0.6296, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:33<09:20, 20.78s/it][A

loss: tensor(0.6566, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:54<09:01, 20.81s/it][A

loss: tensor(0.6958, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:15<08:39, 20.78s/it][A

tensor(0.6185, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:35<08:17, 20.75s/it][A

loss: tensor(0.6406, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:56<07:57, 20.74s/it][A

tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:17<07:37, 20.80s/it][A

tensor(0.6232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:38<07:16, 20.78s/it][A

tensor(0.6128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:59<06:56, 20.81s/it][A

tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:20<06:36, 20.84s/it][A

tensor(0.6394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:41<06:15, 20.85s/it][A

tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:01<05:54, 20.86s/it][A

tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:23<05:34, 20.92s/it][A

tensor(0.6222, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:43<05:13, 20.89s/it][A

tensor(0.6380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:04<04:53, 20.93s/it][A

tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:25<04:32, 20.98s/it][A

tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:46<04:11, 20.99s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:07<03:50, 20.99s/it][A

tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:28<03:29, 20.99s/it][A

tensor(0.5916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:49<03:08, 20.97s/it][A

tensor(0.6975, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:10<02:47, 20.94s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:31<02:26, 20.90s/it][A

tensor(0.6581, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:52<02:05, 20.84s/it][A

tensor(0.6311, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:13<01:44, 20.84s/it][A

tensor(0.6538, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:33<01:23, 20.83s/it][A

tensor(0.5988, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:55<01:02, 20.91s/it][A

tensor(0.6531, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:16<00:41, 20.95s/it][A

tensor(0.6541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:37<00:21, 21.01s/it][A

tensor(0.6817, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:44<00:00, 20.78s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7333, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6403745527727058

	train acc: 0.6480182092004382

	training prec: 0.8454377376189062

	training rec: 0.6480182092004382

	training f1: 0.7084997116983204

	Current Learning rate:  2.8571428571428573e-06



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.65s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.66s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.67s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.66s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.65s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.66s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6245451087043399

	Validation acc: 0.6800595238095238

	Validation prec: 0.869105833740114

	Validation rec: 0.6800595238095238

	Validation f1: 0.7371636184205467
loss: 


  1%|          | 1/83 [00:20<28:36, 20.93s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:13, 20.90s/it][A

tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:53, 20.92s/it][A

tensor(0.5999, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:30, 20.89s/it][A

tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:08, 20.88s/it][A

tensor(0.6722, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:51, 20.92s/it][A

tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:29, 20.92s/it][A

tensor(0.6128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:07, 20.90s/it][A

tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:48, 20.93s/it][A

tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:28, 20.93s/it][A

tensor(0.6546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:09, 20.96s/it][A

tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:47, 20.95s/it][A

tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:23, 20.91s/it][A

tensor(0.7191, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<24:04, 20.94s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:45, 20.97s/it][A

tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:23, 20.95s/it][A

tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<23:01, 20.94s/it][A

tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:42, 20.96s/it][A

tensor(0.6124, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:20, 20.94s/it][A

tensor(0.6352, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:58, 20.93s/it][A

tensor(0.6564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:38, 20.94s/it][A

tensor(0.6830, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:40<21:16, 20.92s/it][A

tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:01<20:58, 20.97s/it][A

tensor(0.6648, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:38, 21.00s/it][A

tensor(0.6173, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:18, 21.00s/it][A

tensor(0.6484, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:55, 20.98s/it][A

tensor(0.6187, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:33, 20.96s/it][A

tensor(0.6411, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:14, 20.99s/it][A

tensor(0.6328, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<18:55, 21.03s/it][A

tensor(0.6220, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:28<18:35, 21.04s/it][A

tensor(0.6217, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:13, 21.03s/it][A

tensor(0.6131, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:10<17:51, 21.01s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:31<17:31, 21.03s/it][A

tensor(0.6524, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:52<17:11, 21.05s/it][A

tensor(0.6472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:52, 21.09s/it][A

tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:34<16:28, 21.04s/it][A

loss: tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:55<16:04, 20.97s/it][A

loss: tensor(0.6923, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:16<15:39, 20.87s/it][A

loss: tensor(0.6542, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:37<15:16, 20.83s/it][A

loss: tensor(0.6308, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:57<14:54, 20.80s/it][A

loss: tensor(0.6631, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:18<14:32, 20.79s/it][A

loss: tensor(0.6359, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:39<14:12, 20.79s/it][A

loss: tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [15:00<13:49, 20.73s/it][A

loss: tensor(0.6202, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:20<13:26, 20.68s/it][A

loss: tensor(0.6072, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:41<13:06, 20.69s/it][A

loss: tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [16:01<12:45, 20.70s/it][A

loss: tensor(0.6196, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:22<12:24, 20.68s/it][A

loss: tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:43<12:03, 20.66s/it][A

loss: tensor(0.6600, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:03<11:41, 20.65s/it][A

loss: tensor(0.6131, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:24<11:21, 20.66s/it][A

loss: tensor(0.7382, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:45<11:00, 20.65s/it][A

loss: tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:06<10:41, 20.71s/it][A

loss: tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:26<10:21, 20.70s/it][A

loss: tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:47<10:01, 20.73s/it][A

tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:08<09:40, 20.73s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:29<09:20, 20.75s/it][A

loss: tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:49<08:59, 20.74s/it][A

loss: tensor(0.6670, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:10<08:38, 20.75s/it][A

tensor(0.6774, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:31<08:16, 20.71s/it][A

tensor(0.6448, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6246, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:51<07:55, 20.68s/it][A
 73%|███████▎  | 61/83 [21:12<07:35, 20.72s/it][A

loss: tensor(0.6485, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:33<07:15, 20.74s/it][A

tensor(0.6589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:54<06:55, 20.76s/it][A

tensor(0.6296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:14<06:34, 20.77s/it][A

tensor(0.5740, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:35<06:14, 20.79s/it][A

tensor(0.6256, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:56<05:53, 20.78s/it][A

tensor(0.6951, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:17<05:32, 20.79s/it][A

tensor(0.5913, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:38<05:11, 20.79s/it][A

tensor(0.6619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:58<04:51, 20.79s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:19<04:30, 20.82s/it][A

tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:40<04:09, 20.82s/it][A

tensor(0.6692, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:01<03:49, 20.82s/it][A

tensor(0.6739, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:22<03:28, 20.83s/it][A

tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:43<03:07, 20.82s/it][A

tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:04<02:46, 20.84s/it][A

tensor(0.6971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:24<02:26, 20.86s/it][A

tensor(0.6660, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:45<02:05, 20.84s/it][A

tensor(0.6007, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:06<01:44, 20.83s/it][A

tensor(0.6460, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:27<01:23, 20.82s/it][A

tensor(0.5833, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:48<01:02, 20.79s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:08<00:41, 20.76s/it][A

tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:29<00:20, 20.76s/it][A

tensor(0.6379, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:36<00:00, 20.68s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6098, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6438691292900637

	train acc: 0.6629415388828039

	training prec: 0.8366295002748201

	training rec: 0.6629415388828039

	training f1: 0.720077247806642

	Current Learning rate:  2.571428571428571e-06



  2%|▏         | 1/42 [00:02<01:48,  2.65s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:20<01:29,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:04,  2.60s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6243358211857932

	Validation acc: 0.6765873015873016

	Validation prec: 0.8652329759061679

	Validation rec: 0.6765873015873016

	Validation f1: 0.7320896749988838
loss: 


  1%|          | 1/83 [00:20<28:34, 20.91s/it][A

tensor(0.6164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:21, 21.01s/it][A

tensor(0.6593, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:59, 21.00s/it][A

tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:42, 21.04s/it][A

tensor(0.7015, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:19, 21.02s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<26:58, 21.02s/it][A

tensor(0.6619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:34, 20.97s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:09, 20.92s/it][A

tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:44, 20.86s/it][A

tensor(0.6233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:21, 20.84s/it][A

tensor(0.6654, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<24:59, 20.83s/it][A

tensor(0.6512, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:37, 20.81s/it][A

tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:18, 20.83s/it][A

tensor(0.6432, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<23:58, 20.85s/it][A

tensor(0.6612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:40, 20.89s/it][A

tensor(0.6785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:21, 20.91s/it][A

tensor(0.6352, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<23:00, 20.92s/it][A

tensor(0.6762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:39, 20.92s/it][A

tensor(0.6300, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:19, 20.93s/it][A

tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:58, 20.93s/it][A

tensor(0.5988, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:35, 20.90s/it][A

tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:40<21:15, 20.91s/it][A

tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:01<20:56, 20.95s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:35, 20.95s/it][A

tensor(0.6436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:15, 20.96s/it][A

tensor(0.5915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:55, 20.98s/it][A

tensor(0.6354, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:34, 20.98s/it][A

tensor(0.6465, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:13, 20.96s/it][A

tensor(0.6259, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<18:53, 20.99s/it][A

tensor(0.6560, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:27<18:31, 20.97s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:48<18:08, 20.93s/it][A

tensor(0.6518, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:09<17:44, 20.88s/it][A

tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:30<17:21, 20.84s/it][A

tensor(0.6417, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:51<17:00, 20.83s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:11<16:40, 20.84s/it][A

tensor(0.6057, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:32<16:20, 20.86s/it][A

tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:53<15:59, 20.86s/it][A

tensor(0.6249, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:14<15:40, 20.90s/it][A

tensor(0.6268, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:35<15:20, 20.92s/it][A

tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:56<15:00, 20.93s/it][A

tensor(0.5907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:17<14:38, 20.91s/it][A

tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:38<14:15, 20.86s/it][A

tensor(0.5999, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:59<13:53, 20.83s/it][A

tensor(0.6808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:19<13:31, 20.81s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:40<13:11, 20.82s/it][A

tensor(0.7102, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:01<12:51, 20.84s/it][A

tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:22<12:30, 20.86s/it][A

tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:43<12:08, 20.83s/it][A

tensor(0.6219, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:04<11:49, 20.85s/it][A

tensor(0.6415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:25<11:29, 20.89s/it][A

tensor(0.6790, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:46<11:09, 20.91s/it][A

tensor(0.6654, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:06<10:47, 20.89s/it][A

tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:27<10:26, 20.87s/it][A

tensor(0.6595, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:48<10:04, 20.85s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:09<09:42, 20.82s/it][A

tensor(0.6948, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:30<09:21, 20.80s/it][A

loss: tensor(0.6804, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:50<08:59, 20.74s/it][A

loss: tensor(0.6305, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:11<08:38, 20.73s/it][A

tensor(0.6169, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:31<08:16, 20.69s/it][A

tensor(0.6146, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:52<07:55, 20.66s/it][A

tensor(0.6428, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:13<07:35, 20.70s/it][A

tensor(0.6517, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:33<07:14, 20.68s/it][A

loss: tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:54<06:53, 20.68s/it][A

loss: tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:15<06:33, 20.73s/it][A

loss: tensor(0.7286, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:36<06:14, 20.78s/it][A

loss: tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:57<05:53, 20.81s/it][A

loss: tensor(0.6619, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:18<05:34, 20.88s/it][A

loss: tensor(0.5876, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:39<05:14, 20.96s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:00<04:54, 21.04s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:21<04:33, 21.05s/it][A

tensor(0.6615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:42<04:12, 21.01s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:03<03:51, 21.02s/it][A

tensor(0.6406, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:24<03:29, 20.97s/it][A

tensor(0.6887, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:45<03:08, 20.93s/it][A

tensor(0.6609, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:06<02:47, 20.94s/it][A

tensor(0.6277, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:27<02:26, 21.00s/it][A

tensor(0.6637, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:48<02:06, 21.02s/it][A

tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:09<01:45, 21.04s/it][A

tensor(0.6355, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:30<01:24, 21.07s/it][A

tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:51<01:03, 21.05s/it][A

tensor(0.6585, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:12<00:42, 21.06s/it][A

tensor(0.6076, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:33<00:21, 21.06s/it][A

tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:41<00:00, 20.74s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5642, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6453486515814999

	train acc: 0.6397521905805038

	training prec: 0.8361153044111205

	training rec: 0.6397521905805038

	training f1: 0.7016631174697143

	Current Learning rate:  2.285714285714286e-06



  2%|▏         | 1/42 [00:02<01:49,  2.66s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.69s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.66s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.65s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.6251652042071024

	Validation acc: 0.6750992063492064

	Validation prec: 0.8683578794164889

	Validation rec: 0.6750992063492064

	Validation f1: 0.7335077956091357



  1%|          | 1/83 [00:20<28:21, 20.75s/it][A

loss: tensor(0.6308, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:41<28:01, 20.76s/it][A

loss: tensor(0.6041, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:02<27:45, 20.82s/it][A

loss: tensor(0.6822, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:23<27:27, 20.85s/it][A

loss: tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:11, 20.91s/it][A

tensor(0.6958, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:49, 20.90s/it][A

tensor(0.7259, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:26<26:27, 20.89s/it][A

loss: tensor(0.6577, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:46<26:04, 20.86s/it][A

loss: tensor(0.6750, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:07<25:44, 20.87s/it][A

loss: tensor(0.6591, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:28<25:23, 20.87s/it][A

loss: tensor(0.5773, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:49<25:06, 20.93s/it][A

loss: tensor(0.6946, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:10<24:43, 20.90s/it][A

loss: tensor(0.6610, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:31<24:21, 20.87s/it][A

loss: tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:52<23:57, 20.83s/it][A

loss: tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:12<23:37, 20.85s/it][A

loss: tensor(0.6137, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:33<23:16, 20.85s/it][A

loss: tensor(0.6119, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:54<22:53, 20.81s/it][A

loss: tensor(0.6428, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:15<22:30, 20.77s/it][A

loss: tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:09, 20.78s/it][A

tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:56<21:50, 20.80s/it][A

tensor(0.6370, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:17<21:26, 20.75s/it][A

tensor(0.6701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:37<20:52, 20.53s/it][A

tensor(0.6248, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:57<20:26, 20.45s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:18<20:17, 20.64s/it][A

tensor(0.6656, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:39<20:01, 20.71s/it][A

tensor(0.6693, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:00<19:45, 20.80s/it][A

tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:21<19:26, 20.82s/it][A

tensor(0.6645, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:42<19:08, 20.89s/it][A

tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:03<18:48, 20.90s/it][A

tensor(0.6395, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:24<18:27, 20.90s/it][A

tensor(0.6003, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:45<18:08, 20.93s/it][A

tensor(0.6274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:06<17:52, 21.03s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:27<17:29, 20.99s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:48<17:08, 20.99s/it][A

tensor(0.6067, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:09<16:46, 20.96s/it][A

tensor(0.6563, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:30<16:26, 20.99s/it][A

tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:51<16:00, 20.88s/it][A

tensor(0.6041, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:12<15:39, 20.89s/it][A

tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:32<15:17, 20.85s/it][A

tensor(0.7041, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:53<14:58, 20.90s/it][A

loss: tensor(0.6370, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:14<14:36, 20.88s/it][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:35<14:13, 20.83s/it][A

loss: tensor(0.6291, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:56<13:54, 20.85s/it][A

loss: tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:17<13:33, 20.85s/it][A

loss: tensor(0.6841, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:37<13:09, 20.77s/it][A

loss: tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:58<12:47, 20.73s/it][A

loss: tensor(0.6465, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:19<12:26, 20.74s/it][A

loss: tensor(0.6735, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:39<12:05, 20.74s/it][A

loss: tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:00<11:45, 20.74s/it][A

loss: tensor(0.6728, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:21<11:24, 20.73s/it][A

loss: tensor(0.5871, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:42<11:03, 20.73s/it][A

loss: tensor(0.7025, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:02<10:41, 20.71s/it][A

loss: tensor(0.6073, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:23<10:21, 20.70s/it][A

loss: tensor(0.6813, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:44<10:00, 20.70s/it][A

loss: tensor(0.7256, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:04<09:40, 20.72s/it][A

loss: tensor(0.6234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:25<09:18, 20.70s/it][A

tensor(0.6636, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:46<08:58, 20.71s/it][A

tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:06<08:37, 20.69s/it][A

tensor(0.6647, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:27<08:15, 20.66s/it][A

loss: tensor(0.6460, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:48<07:54, 20.63s/it][A

loss: tensor(0.6609, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:08<07:34, 20.65s/it][A

tensor(0.6978, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:29<07:14, 20.69s/it][A

tensor(0.6848, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:50<06:54, 20.74s/it][A

tensor(0.6754, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:11<06:36, 20.85s/it][A

tensor(0.6843, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:32<06:16, 20.89s/it][A

tensor(0.7316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:53<05:55, 20.89s/it][A

tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:14<05:35, 20.97s/it][A

tensor(0.6025, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:35<05:14, 20.98s/it][A

tensor(0.6974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:56<04:53, 21.00s/it][A

tensor(0.6194, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:17<04:33, 21.02s/it][A

tensor(0.6937, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:38<04:11, 20.96s/it][A

tensor(0.6415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:59<03:50, 20.96s/it][A

tensor(0.6761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:20<03:29, 20.90s/it][A

tensor(0.5899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:40<03:07, 20.86s/it][A

tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:01<02:46, 20.84s/it][A

tensor(0.6414, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:22<02:25, 20.85s/it][A

tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:43<02:05, 20.85s/it][A

tensor(0.5879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:04<01:44, 20.84s/it][A

tensor(0.6132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:25<01:23, 20.85s/it][A

tensor(0.6126, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:46<01:02, 20.85s/it][A

tensor(0.5841, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:06<00:41, 20.87s/it][A

tensor(0.6350, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:27<00:20, 20.87s/it][A

tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:34<00:00, 20.66s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.8050, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6488869735993534

	train acc: 0.6515436746987951

	training prec: 0.8364140533444662

	training rec: 0.6515436746987951

	training f1: 0.7106611764913645

	Current Learning rate:  2.0000000000000003e-06



  2%|▏         | 1/42 [00:02<01:45,  2.56s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.60s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.60s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.61s/it][A
 19%|█▉        | 8/42 [00:20<01:29,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.61s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.60s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.60s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:46<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.60s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.624702132883526

	Validation acc: 0.6773313492063492

	Validation prec: 0.8623679533875358

	Validation rec: 0.6773313492063492

	Validation f1: 0.733909045471089
loss: 


  1%|          | 1/83 [00:20<28:39, 20.97s/it][A

tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:14, 20.92s/it][A

tensor(0.6695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:02, 21.04s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:39, 21.01s/it][A

tensor(0.6263, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:19, 21.02s/it][A

tensor(0.6162, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:56, 20.99s/it][A

tensor(0.6637, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:33, 20.97s/it][A

tensor(0.6075, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:09, 20.93s/it][A

tensor(0.6842, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:48, 20.92s/it][A

tensor(0.6486, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:30, 20.97s/it][A

tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:11, 20.99s/it][A

tensor(0.7027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:50, 20.99s/it][A

tensor(0.6628, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:28, 20.97s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:05, 20.94s/it][A

tensor(0.5820, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:44, 20.95s/it][A

tensor(0.6463, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:22, 20.93s/it][A

tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:01, 20.93s/it][A

tensor(0.6339, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:37, 20.88s/it][A

tensor(0.6484, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:14, 20.85s/it][A

tensor(0.6593, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:54, 20.86s/it][A

tensor(0.6610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:46, 21.07s/it][A

tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:32, 21.19s/it][A

tensor(0.6519, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:04<21:40, 21.67s/it][A

tensor(0.6456, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:26<21:20, 21.70s/it][A

tensor(0.6579, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:48<21:01, 21.75s/it][A

tensor(0.6441, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:10<20:42, 21.79s/it][A

tensor(0.5840, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:32<20:23, 21.85s/it][A

tensor(0.6428, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:54<20:05, 21.91s/it][A

tensor(0.6688, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:17<20:01, 22.24s/it][A

tensor(0.6816, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:39<19:47, 22.41s/it][A

tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:01<19:15, 22.23s/it][A

tensor(0.6616, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:23<18:49, 22.16s/it][A

tensor(0.6131, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:45<18:22, 22.04s/it][A

tensor(0.5725, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:07<18:03, 22.10s/it][A

tensor(0.5850, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:29<17:32, 21.94s/it][A

tensor(0.6808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:51<17:12, 21.96s/it][A

tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:13<16:52, 22.01s/it][A

tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:35<16:28, 21.96s/it][A

tensor(0.6278, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:57<16:03, 21.90s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:18<15:39, 21.84s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:40<15:14, 21.78s/it][A

tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:02<14:59, 21.94s/it][A

tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:24<14:39, 21.98s/it][A

tensor(0.6372, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:46<14:17, 22.00s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:08<13:54, 21.95s/it][A

tensor(0.6851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:31<13:38, 22.12s/it][A

tensor(0.6708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:53<13:23, 22.32s/it][A

tensor(0.6897, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:15<12:55, 22.15s/it][A

tensor(0.6071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:37<12:26, 21.95s/it][A

tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:58<11:58, 21.76s/it][A

tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:20<11:34, 21.69s/it][A

tensor(0.6298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:41<11:10, 21.63s/it][A

tensor(0.6242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:03<10:49, 21.64s/it][A

tensor(0.6396, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:25<10:30, 21.74s/it][A

tensor(0.6654, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:47<10:10, 21.81s/it][A

tensor(0.7085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:10<10:03, 22.37s/it][A

tensor(0.6431, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:32<09:39, 22.27s/it][A

tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:54<09:11, 22.05s/it][A

loss: tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [21:16<08:49, 22.05s/it][A

loss: tensor(0.6611, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [21:38<08:23, 21.91s/it][A

loss: tensor(0.6454, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [22:00<08:06, 22.11s/it][A

tensor(0.6789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:23<07:50, 22.43s/it][A

tensor(0.6365, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:46<07:29, 22.50s/it][A

tensor(0.6584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:08<07:04, 22.36s/it][A

tensor(0.6250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:31<06:44, 22.50s/it][A

tensor(0.6014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:54<06:24, 22.64s/it][A

tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [24:15<05:57, 22.33s/it][A

tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:37<05:30, 22.04s/it][A

tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:59<05:07, 21.98s/it][A

tensor(0.5899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:21<04:46, 22.03s/it][A

tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:42<04:22, 21.90s/it][A

tensor(0.6627, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [26:04<04:00, 21.84s/it][A

tensor(0.6324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:27<03:40, 22.04s/it][A

tensor(0.6324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:49<03:18, 22.08s/it][A

tensor(0.6286, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [27:10<02:54, 21.79s/it][A

tensor(0.6061, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:31<02:31, 21.61s/it][A

tensor(0.6392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:52<02:08, 21.36s/it][A

tensor(0.6732, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [28:13<01:46, 21.24s/it][A

tensor(0.6921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:34<01:24, 21.13s/it][A

tensor(0.6509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:54<01:03, 21.00s/it][A

tensor(0.6638, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:15<00:42, 21.04s/it][A

tensor(0.6810, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:37<00:21, 21.08s/it][A

tensor(0.6159, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:44<00:00, 21.50s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6357, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6432078136018959

	train acc: 0.6547097480832421

	training prec: 0.8419488488156235

	training rec: 0.6547097480832421

	training f1: 0.7148595203935637

	Current Learning rate:  1.7142857142857145e-06



  2%|▏         | 1/42 [00:02<01:50,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.67s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.63s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.65s/it][A
 33%|███▎      | 14/42 [00:37<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.65s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.65s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.6206898944718497

	Validation acc: 0.6830357142857143

	Validation prec: 0.8725476995796849

	Validation rec: 0.6830357142857143

	Validation f1: 0.7397742349306237
loss: 


  1%|          | 1/83 [00:21<28:55, 21.16s/it][A

tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:25, 21.06s/it][A

tensor(0.6458, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:08, 21.11s/it][A

tensor(0.6246, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:47, 21.10s/it][A

tensor(0.6800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:27, 21.13s/it][A

tensor(0.5677, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<27:07, 21.14s/it][A

tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:28<26:49, 21.18s/it][A

tensor(0.6215, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:49<26:25, 21.14s/it][A

tensor(0.5857, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:10<26:03, 21.13s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:31<25:40, 21.11s/it][A

tensor(0.7045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:52<25:19, 21.11s/it][A

tensor(0.6295, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:13<25:00, 21.13s/it][A

tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:34<24:39, 21.14s/it][A

tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:55<24:18, 21.14s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:16<23:56, 21.12s/it][A

tensor(0.6273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:37<23:32, 21.09s/it][A

tensor(0.6974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:58<23:11, 21.08s/it][A

tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:20<22:49, 21.07s/it][A

tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:41<22:28, 21.07s/it][A

tensor(0.6965, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:02<22:09, 21.10s/it][A

tensor(0.6890, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:23<21:48, 21.11s/it][A

tensor(0.6479, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:44<21:27, 21.11s/it][A

tensor(0.6392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:05<21:03, 21.05s/it][A

tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:26<20:39, 21.00s/it][A

tensor(0.6414, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:47<20:18, 21.00s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:08<19:57, 21.00s/it][A

tensor(0.6859, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:29<19:35, 20.99s/it][A

tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:50<19:14, 20.99s/it][A

tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:11<18:50, 20.94s/it][A

loss: tensor(0.6591, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:32<18:29, 20.94s/it][A

loss: tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:53<18:09, 20.96s/it][A

loss: tensor(0.6136, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:13<17:47, 20.92s/it][A

loss: tensor(0.5824, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:34<17:26, 20.92s/it][A

loss: tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:55<17:03, 20.89s/it][A

loss: tensor(0.6136, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:16<16:42, 20.88s/it][A

loss: tensor(0.6258, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:37<16:22, 20.90s/it][A

tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:58<16:03, 20.94s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:19<15:43, 20.96s/it][A

tensor(0.6911, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:40<15:19, 20.90s/it][A

tensor(0.5513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:00<14:57, 20.87s/it][A

tensor(0.6644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:21<14:36, 20.86s/it][A

tensor(0.6241, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:42<14:16, 20.90s/it][A

tensor(0.6139, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:03<13:57, 20.93s/it][A

tensor(0.5858, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:24<13:37, 20.96s/it][A

tensor(0.6383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:45<13:13, 20.88s/it][A

tensor(0.6957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:06<12:52, 20.87s/it][A

tensor(0.5880, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:29<13:00, 21.68s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:53<12:54, 22.13s/it][A

tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:16<12:40, 22.38s/it][A

tensor(0.6356, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:38<12:18, 22.38s/it][A

tensor(0.6884, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:59<11:43, 21.98s/it][A

tensor(0.6457, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:20<11:15, 21.81s/it][A

tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:43<10:58, 21.96s/it][A

tensor(0.6864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:06<10:43, 22.20s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:28<10:23, 22.26s/it][A

tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:50<09:58, 22.16s/it][A

loss: tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [20:13<09:43, 22.44s/it][A

loss: tensor(0.6553, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:35<09:15, 22.22s/it][A

loss: tensor(0.6607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:56<08:47, 22.00s/it][A

tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:18<08:23, 21.89s/it][A

tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:39<07:59, 21.79s/it][A

tensor(0.6004, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:00<07:32, 21.56s/it][A

tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:21<07:07, 21.39s/it][A

tensor(0.6415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:42<06:44, 21.30s/it][A

tensor(0.7384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:03<06:22, 21.23s/it][A

tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:25<06:00, 21.19s/it][A

tensor(0.6311, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:46<05:38, 21.15s/it][A

tensor(0.6073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:07<05:16, 21.07s/it][A

tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:28<04:54, 21.06s/it][A

tensor(0.6093, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:48<04:33, 21.02s/it][A

tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:09<04:11, 20.97s/it][A

tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:30<03:51, 21.00s/it][A

tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:51<03:29, 20.97s/it][A

tensor(0.6136, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:12<03:08, 20.97s/it][A

tensor(0.7123, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:33<02:47, 20.96s/it][A

tensor(0.5954, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:54<02:26, 20.96s/it][A

tensor(0.6723, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:15<02:05, 20.94s/it][A

tensor(0.6694, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:36<01:44, 20.96s/it][A

tensor(0.6177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:57<01:23, 20.94s/it][A

tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:18<01:02, 20.95s/it][A

tensor(0.5926, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:39<00:41, 20.94s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:00<00:20, 20.93s/it][A

tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:07<00:00, 21.05s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6707, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6407921594309519

	train acc: 0.6582523274917853

	training prec: 0.8399962477733539

	training rec: 0.6582523274917853

	training f1: 0.7161548159566921

	Current Learning rate:  1.4285714285714286e-06



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.6229302187760671

	Validation acc: 0.675843253968254

	Validation prec: 0.8661213626023874

	Validation rec: 0.675843253968254

	Validation f1: 0.7312023852445877
loss: 


  1%|          | 1/83 [00:20<28:38, 20.96s/it][A

tensor(0.6633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:25, 21.05s/it][A

tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:02, 21.03s/it][A

tensor(0.6566, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:41, 21.04s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:19, 21.02s/it][A

tensor(0.5902, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:54, 20.96s/it][A

tensor(0.6679, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:32, 20.95s/it][A

tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:11, 20.95s/it][A

tensor(0.6472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:51, 20.97s/it][A

tensor(0.6401, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:29, 20.95s/it][A

tensor(0.7031, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:08, 20.95s/it][A

tensor(0.6665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:44, 20.91s/it][A

tensor(0.6430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:23, 20.91s/it][A

tensor(0.6509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:03, 20.92s/it][A

tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:44, 20.95s/it][A

tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:22, 20.93s/it][A

tensor(0.6079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:00, 20.92s/it][A

tensor(0.5958, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:37, 20.89s/it][A

tensor(0.6250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:18, 20.91s/it][A

tensor(0.6692, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<22:02, 20.99s/it][A

tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:43, 21.03s/it][A

tensor(0.6339, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:25, 21.08s/it][A

tensor(0.6820, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<21:04, 21.07s/it][A

tensor(0.6790, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:40, 21.03s/it][A

tensor(0.6688, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:44<20:18, 21.00s/it][A

tensor(0.7034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:05<19:56, 20.99s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:26<19:40, 21.08s/it][A

tensor(0.6869, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:47<19:19, 21.08s/it][A

loss: tensor(0.6998, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:08<18:56, 21.04s/it][A

loss: tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:29<18:33, 21.01s/it][A

loss: tensor(0.6625, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:50<18:11, 20.99s/it][A

loss: tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:11<17:47, 20.94s/it][A

loss: tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:32<17:28, 20.96s/it][A

loss: tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:53<17:07, 20.98s/it][A

tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:44, 20.92s/it][A

tensor(0.6696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:34<16:21, 20.88s/it][A

tensor(0.5778, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:55<15:59, 20.85s/it][A

tensor(0.6852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:16<15:37, 20.84s/it][A

tensor(0.6970, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:15, 20.81s/it][A

tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<14:55, 20.82s/it][A

tensor(0.6066, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:18<14:34, 20.83s/it][A

tensor(0.6404, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:39<14:15, 20.86s/it][A

tensor(0.6681, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:00<13:53, 20.85s/it][A

tensor(0.6434, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:21<13:33, 20.87s/it][A

tensor(0.5924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:42<13:14, 20.91s/it][A

tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:03<12:53, 20.90s/it][A

tensor(0.6009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:24<12:33, 20.92s/it][A

tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:45<12:11, 20.91s/it][A

tensor(0.6193, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:06<11:50, 20.91s/it][A

tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:27<11:30, 20.93s/it][A

tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:48<11:11, 20.99s/it][A

tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:09<10:51, 21.02s/it][A

tensor(0.6358, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:30<10:29, 20.97s/it][A

tensor(0.6285, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:51<10:08, 20.99s/it][A

tensor(0.6597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:12<09:45, 20.92s/it][A

tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:33<09:24, 20.90s/it][A

tensor(0.6215, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:53<09:02, 20.85s/it][A

loss: tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:14<08:40, 20.82s/it][A

loss: tensor(0.6251, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:35<08:20, 20.83s/it][A

tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:56<07:59, 20.85s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:17<07:38, 20.84s/it][A

tensor(0.6908, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:37<07:17, 20.84s/it][A

tensor(0.7157, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:58<06:57, 20.86s/it][A

tensor(0.6073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:19<06:36, 20.88s/it][A

tensor(0.6724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:40<06:15, 20.85s/it][A

tensor(0.6159, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:01<05:55, 20.91s/it][A

tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:22<05:35, 20.96s/it][A

tensor(0.6794, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:43<05:14, 20.99s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:04<04:53, 20.97s/it][A

tensor(0.6341, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:25<04:32, 20.92s/it][A

tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:46<04:11, 20.93s/it][A

tensor(0.6485, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:07<03:50, 20.94s/it][A

tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:28<03:29, 20.99s/it][A

tensor(0.6899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:49<03:08, 20.95s/it][A

tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:10<02:47, 20.92s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:31<02:26, 20.90s/it][A

tensor(0.6797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:51<02:05, 20.88s/it][A

tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:12<01:44, 20.87s/it][A

tensor(0.5918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:33<01:23, 20.83s/it][A

tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:54<01:02, 20.82s/it][A

tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:15<00:41, 20.84s/it][A

tensor(0.5936, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:36<00:20, 20.90s/it][A

tensor(0.6937, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:43<00:00, 20.76s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6174, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6446837739772108

	train acc: 0.6528101040525739

	training prec: 0.8384409162812905

	training rec: 0.6528101040525739

	training f1: 0.7127101918376748

	Current Learning rate:  1.142857142857143e-06



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.63s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.61s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.60s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.61s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.62s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.60s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:41<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6231110138552529

	Validation acc: 0.6850198412698413

	Validation prec: 0.8701945292300293

	Validation rec: 0.6850198412698413

	Validation f1: 0.7415548670053299
loss: 


  1%|          | 1/83 [00:21<29:02, 21.25s/it][A

tensor(0.6134, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:34, 21.17s/it][A

tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:03<28:06, 21.08s/it][A

tensor(0.6124, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:44, 21.07s/it][A

tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:19, 21.01s/it][A

tensor(0.6247, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:06<26:57, 21.01s/it][A

tensor(0.6835, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:27<26:36, 21.01s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:48<26:08, 20.92s/it][A

tensor(0.5744, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:47, 20.91s/it][A

tensor(0.6324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:24, 20.88s/it][A

tensor(0.6584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:00, 20.84s/it][A

tensor(0.6198, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:39, 20.83s/it][A

tensor(0.7169, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:19, 20.85s/it][A

tensor(0.6644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:52<23:57, 20.83s/it][A

tensor(0.6548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:13<23:38, 20.86s/it][A

tensor(0.6661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:34<23:16, 20.85s/it][A

tensor(0.6473, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<22:56, 20.86s/it][A

tensor(0.6561, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:37, 20.88s/it][A

tensor(0.6817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:15, 20.87s/it][A

tensor(0.6233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:53, 20.85s/it][A

tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:32, 20.85s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:39<21:10, 20.83s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:00<20:49, 20.82s/it][A

tensor(0.6326, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:21<20:28, 20.82s/it][A

tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:42<20:08, 20.83s/it][A

tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:03<19:48, 20.86s/it][A

tensor(0.6153, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:24<19:26, 20.84s/it][A

tensor(0.6539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:44<19:05, 20.82s/it][A

tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:05<18:43, 20.81s/it][A

tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:26<18:25, 20.86s/it][A

tensor(0.6718, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:47<18:06, 20.89s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:08<17:45, 20.89s/it][A

tensor(0.6259, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:29<17:25, 20.91s/it][A

tensor(0.6781, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:50<17:03, 20.90s/it][A

tensor(0.5864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:11<16:42, 20.88s/it][A

tensor(0.6370, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:31<16:21, 20.88s/it][A

tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:52<16:00, 20.87s/it][A

tensor(0.6650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:13<15:38, 20.87s/it][A

tensor(0.6141, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:34<15:18, 20.87s/it][A

tensor(0.6562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:55<14:57, 20.87s/it][A

tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:16<14:37, 20.88s/it][A

tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:37<14:17, 20.91s/it][A

tensor(0.6424, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:58<13:57, 20.93s/it][A

tensor(0.6618, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:19<13:36, 20.94s/it][A

tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:40<13:15, 20.93s/it][A

tensor(0.6546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:01<12:55, 20.95s/it][A

tensor(0.5977, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:22<12:34, 20.95s/it][A

tensor(0.6606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:43<12:13, 20.96s/it][A

tensor(0.6842, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:04<11:53, 20.99s/it][A

tensor(0.6366, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:25<11:34, 21.04s/it][A

tensor(0.6701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:46<11:12, 21.01s/it][A

tensor(0.6663, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:07<10:49, 20.96s/it][A

loss: tensor(0.7236, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:27<10:26, 20.88s/it][A

loss: tensor(0.6783, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:48<10:06, 20.90s/it][A

tensor(0.7467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:09<09:45, 20.89s/it][A

tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:30<09:22, 20.83s/it][A

tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:50<09:00, 20.79s/it][A

tensor(0.6723, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:11<08:39, 20.79s/it][A

tensor(0.7050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:32<08:19, 20.79s/it][A

tensor(0.6874, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:53<07:59, 20.86s/it][A

tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:14<07:40, 20.91s/it][A

tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:35<07:19, 20.93s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:56<06:58, 20.95s/it][A

tensor(0.6200, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:17<06:37, 20.92s/it][A

tensor(0.6761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:38<06:16, 20.91s/it][A

tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:59<05:54, 20.88s/it][A

tensor(0.6487, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:19<05:33, 20.84s/it][A

tensor(0.6493, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:40<05:12, 20.84s/it][A

tensor(0.6384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:01<04:52, 20.89s/it][A

tensor(0.6110, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:22<04:31, 20.88s/it][A

tensor(0.6746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:43<04:10, 20.88s/it][A

tensor(0.6690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:04<03:49, 20.87s/it][A

tensor(0.6199, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:25<03:28, 20.84s/it][A

tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:46<03:07, 20.87s/it][A

tensor(0.6910, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:06<02:47, 20.89s/it][A

tensor(0.7286, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:27<02:26, 20.87s/it][A

tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:48<02:05, 20.92s/it][A

tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:09<01:44, 20.92s/it][A

tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:30<01:23, 20.94s/it][A

tensor(0.5948, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:51<01:02, 20.91s/it][A

tensor(0.6339, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:12<00:41, 20.88s/it][A

tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:33<00:20, 20.89s/it][A

tensor(0.6098, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:40<00:00, 20.73s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6121, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6473468944250819

	train acc: 0.6530154709748084

	training prec: 0.8343593741266084

	training rec: 0.6530154709748084

	training f1: 0.711424205916155

	Current Learning rate:  8.571428571428572e-07



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.61s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.61s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.60s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6195623207659948

	Validation acc: 0.6830357142857143

	Validation prec: 0.8700691214617882

	Validation rec: 0.6830357142857143

	Validation f1: 0.7385648274328479
loss: 


  1%|          | 1/83 [00:20<28:31, 20.87s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:14, 20.93s/it][A

tensor(0.5784, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:54, 20.94s/it][A

tensor(0.6745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:32, 20.92s/it][A

tensor(0.6577, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:07, 20.86s/it][A

tensor(0.6405, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:46, 20.86s/it][A

tensor(0.6388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:25, 20.86s/it][A

tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:04, 20.86s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:57, 21.05s/it][A

tensor(0.7121, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:32, 20.99s/it][A

tensor(0.6545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:08, 20.95s/it][A

tensor(0.6322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<25:01, 21.15s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:36, 21.09s/it][A

tensor(0.6289, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:10, 21.02s/it][A

tensor(0.6138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:45, 20.97s/it][A

tensor(0.6783, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:24, 20.97s/it][A

tensor(0.6219, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:00, 20.92s/it][A

tensor(0.7087, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:42, 20.97s/it][A

tensor(0.6722, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:25, 21.03s/it][A

tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<22:03, 21.01s/it][A

tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:45, 21.05s/it][A

tensor(0.6736, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:21, 21.00s/it][A

tensor(0.5865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<21:00, 21.00s/it][A

tensor(0.6303, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:36, 20.97s/it][A

tensor(0.6920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:44<20:15, 20.96s/it][A

tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:05<19:53, 20.94s/it][A

tensor(0.6321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:26<19:34, 20.97s/it][A

tensor(0.5773, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:47<19:15, 21.01s/it][A

tensor(0.6682, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:08<18:53, 20.98s/it][A

tensor(0.6152, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:29<18:33, 21.01s/it][A

tensor(0.6531, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:50<18:12, 21.00s/it][A

tensor(0.6286, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:11<17:50, 20.99s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:32<17:28, 20.97s/it][A

tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:53<17:08, 20.99s/it][A

tensor(0.6722, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:48, 21.00s/it][A

tensor(0.6200, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:25, 20.96s/it][A

tensor(0.6566, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:55<16:02, 20.93s/it][A

tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:16<15:42, 20.94s/it][A

tensor(0.6302, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:37<15:21, 20.94s/it][A

tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:58<15:00, 20.94s/it][A

tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:19<14:40, 20.98s/it][A

tensor(0.6676, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:40<14:19, 20.95s/it][A

tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:01<13:58, 20.96s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:22<13:36, 20.93s/it][A

tensor(0.6392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:43<13:14, 20.92s/it][A

tensor(0.6746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:04<12:53, 20.90s/it][A

tensor(0.6753, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:25<12:31, 20.88s/it][A

tensor(0.6808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:46<12:10, 20.87s/it][A

tensor(0.6972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:06<11:49, 20.86s/it][A

tensor(0.6271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:27<11:28, 20.87s/it][A

tensor(0.6493, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:48<11:08, 20.88s/it][A

tensor(0.6743, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:09<10:46, 20.85s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:30<10:25, 20.86s/it][A

loss: tensor(0.6291, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6443, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:51<10:05, 20.87s/it][A
 66%|██████▋   | 55/83 [19:12<09:43, 20.84s/it][A

loss: tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:32<09:22, 20.85s/it][A

tensor(0.6233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:53<09:01, 20.83s/it][A

tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:14<08:41, 20.86s/it][A

tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:36<08:30, 21.26s/it][A

tensor(0.6433, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:58<08:10, 21.34s/it][A

tensor(0.7045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:19<07:49, 21.32s/it][A

tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:42<07:34, 21.66s/it][A

tensor(0.6492, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:04<07:17, 21.86s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:26<06:54, 21.84s/it][A

tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:47<06:31, 21.77s/it][A

tensor(0.7568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:10<06:13, 21.95s/it][A

tensor(0.6209, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:31<05:46, 21.68s/it][A

tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:51<05:21, 21.40s/it][A

tensor(0.6790, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:12<04:56, 21.20s/it][A

tensor(0.6317, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:33<04:34, 21.11s/it][A

tensor(0.5926, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:54<04:12, 21.06s/it][A

tensor(0.6397, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:15<03:51, 21.01s/it][A

tensor(0.6974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:36<03:29, 20.91s/it][A

tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:56<03:07, 20.88s/it][A

tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:17<02:47, 20.91s/it][A

tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:38<02:26, 20.93s/it][A

tensor(0.6678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:59<02:05, 20.95s/it][A

tensor(0.6759, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:20<01:44, 20.95s/it][A

tensor(0.6069, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:41<01:23, 20.95s/it][A

tensor(0.7324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:02<01:02, 20.98s/it][A

tensor(0.5794, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:23<00:41, 20.99s/it][A

tensor(0.6671, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:44<00:20, 20.98s/it][A

tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:51<00:00, 20.87s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6862, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6476467359496887

	train acc: 0.6496098028477546

	training prec: 0.8390060839837024

	training rec: 0.6496098028477546

	training f1: 0.709914617740682

	Current Learning rate:  5.714285714285715e-07



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.62s/it][A
  7%|▋         | 3/42 [00:07<01:41,  2.60s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.62s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.61s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.59s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.61s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.63s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:36<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.6208241440000988

	Validation acc: 0.6808035714285714

	Validation prec: 0.8701094240907636

	Validation rec: 0.6808035714285714

	Validation f1: 0.7364268461984755
loss: 


  1%|          | 1/83 [00:20<28:34, 20.91s/it][A

tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:11, 20.88s/it][A

tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:56, 20.95s/it][A

tensor(0.6876, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:34, 20.94s/it][A

tensor(0.6314, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:16, 20.98s/it][A

tensor(0.5588, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:57, 21.01s/it][A

tensor(0.6479, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:34, 20.98s/it][A

tensor(0.6174, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:14, 20.99s/it][A

tensor(0.6042, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:50, 20.95s/it][A

tensor(0.6985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:27, 20.92s/it][A

tensor(0.6345, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:06, 20.92s/it][A

tensor(0.6022, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:46, 20.93s/it][A

tensor(0.6381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:26, 20.95s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:03, 20.92s/it][A

tensor(0.5984, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:44, 20.94s/it][A

tensor(0.6155, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:21, 20.92s/it][A

tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:55<22:59, 20.90s/it][A

tensor(0.6174, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:16<22:40, 20.93s/it][A

tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:37<22:19, 20.92s/it][A

tensor(0.6284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:58<21:57, 20.91s/it][A

tensor(0.6201, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:19<21:35, 20.90s/it][A

tensor(0.7026, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:40<21:14, 20.90s/it][A

tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:01<20:58, 20.97s/it][A

tensor(0.6684, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:39, 21.01s/it][A

tensor(0.6894, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:43<20:19, 21.03s/it][A

tensor(0.6128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:58, 21.03s/it][A

tensor(0.6843, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:39, 21.05s/it][A

tensor(0.7023, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:47<19:21, 21.11s/it][A

tensor(0.6198, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:08<18:59, 21.10s/it][A

tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:29<18:37, 21.08s/it][A

tensor(0.6048, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:50<18:16, 21.08s/it][A

tensor(0.6711, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:11<17:55, 21.10s/it][A

tensor(0.6463, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:32<17:34, 21.10s/it][A

tensor(0.6735, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:53<17:13, 21.09s/it][A

tensor(0.6862, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:14<16:50, 21.05s/it][A

tensor(0.6757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:35<16:30, 21.07s/it][A

tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:56<16:07, 21.03s/it][A

tensor(0.6579, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:17<15:48, 21.07s/it][A

tensor(0.6487, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:38<15:25, 21.03s/it][A

tensor(0.6459, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:59<15:03, 21.02s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:20<14:43, 21.04s/it][A

tensor(0.6031, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:41<14:20, 21.00s/it][A

tensor(0.6779, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:02<13:59, 20.98s/it][A

tensor(0.6033, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:23<13:41, 21.06s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:44<13:18, 21.01s/it][A

tensor(0.6621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:05<12:56, 20.98s/it][A

tensor(0.6743, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:26<12:34, 20.97s/it][A

tensor(0.7163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:47<12:12, 20.93s/it][A

tensor(0.6621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:08<11:49, 20.86s/it][A

tensor(0.6419, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:29<11:27, 20.84s/it][A

tensor(0.6519, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:49<11:05, 20.81s/it][A

loss: tensor(0.6740, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:10<10:47, 20.87s/it][A

tensor(0.6370, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:31<10:28, 20.94s/it][A

tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:52<10:07, 20.95s/it][A

tensor(0.6208, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:13<09:46, 20.94s/it][A

tensor(0.5852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:34<09:25, 20.94s/it][A

tensor(0.6896, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:55<09:05, 20.97s/it][A

tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:16<08:43, 20.93s/it][A

tensor(0.6729, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:37<08:23, 20.96s/it][A

tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:58<08:01, 20.95s/it][A

tensor(0.6835, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:19<07:40, 20.92s/it][A

tensor(0.6200, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:40<07:18, 20.86s/it][A

tensor(0.6736, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:00<06:56, 20.82s/it][A

tensor(0.6844, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:21<06:36, 20.85s/it][A

tensor(0.6317, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:42<06:15, 20.85s/it][A

tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:03<05:54, 20.85s/it][A

tensor(0.6164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:24<05:33, 20.86s/it][A

tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:45<05:12, 20.85s/it][A

tensor(0.6337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:06<04:51, 20.86s/it][A

tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:27<04:31, 20.88s/it][A

tensor(0.6910, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:48<04:11, 20.92s/it][A

tensor(0.5965, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:09<03:50, 20.96s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:30<03:29, 20.98s/it][A

tensor(0.6039, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:50<03:08, 20.95s/it][A

tensor(0.6139, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:12<02:47, 20.98s/it][A

tensor(0.6537, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:33<02:27, 21.02s/it][A

tensor(0.7002, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:54<02:06, 21.00s/it][A

tensor(0.6237, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:15<01:45, 21.03s/it][A

tensor(0.6156, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:36<01:24, 21.05s/it][A

tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:57<01:03, 21.01s/it][A

tensor(0.6601, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:18<00:42, 21.01s/it][A

tensor(0.6947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:39<00:21, 21.02s/it][A

tensor(0.6561, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:46<00:00, 20.80s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6454701344650912

	train acc: 0.650157447973713

	training prec: 0.8385407717082606

	training rec: 0.650157447973713

	training f1: 0.7103852076544891

	Current Learning rate:  2.8571428571428575e-07



  2%|▏         | 1/42 [00:02<01:45,  2.56s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.61s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.62s/it][A
 10%|▉         | 4/42 [00:10<01:38,  2.60s/it][A
 12%|█▏        | 5/42 [00:13<01:36,  2.61s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:30,  2.58s/it][A
 19%|█▉        | 8/42 [00:20<01:28,  2.59s/it][A
 21%|██▏       | 9/42 [00:23<01:25,  2.60s/it][A
 24%|██▍       | 10/42 [00:25<01:23,  2.60s/it][A
 26%|██▌       | 11/42 [00:28<01:20,  2.60s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.62s/it][A
 31%|███       | 13/42 [00:33<01:15,  2.60s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.60s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:46<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:49<00:59,  2.60s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.622743607986541

	Validation acc: 0.6728670634920635

	Validation prec: 0.8660456334410578

	Validation rec: 0.6728670634920635

	Validation f1: 0.7303481668288089
loss: 


  1%|          | 1/83 [00:20<28:34, 20.91s/it][A

tensor(0.6801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:18, 20.97s/it][A

tensor(0.6869, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:59, 20.99s/it][A

tensor(0.5948, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:34, 20.94s/it][A

tensor(0.5667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:44<27:11, 20.92s/it][A

tensor(0.6610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:49, 20.90s/it][A

tensor(0.6548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:27, 20.89s/it][A

tensor(0.6610, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:47<26:06, 20.89s/it][A

tensor(0.7212, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:48, 20.93s/it][A

tensor(0.6648, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:27, 20.93s/it][A

tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:50<25:09, 20.96s/it][A

tensor(0.6792, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:11<24:46, 20.93s/it][A

tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:32<24:25, 20.94s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:53<24:06, 20.97s/it][A

tensor(0.6288, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:14<23:49, 21.02s/it][A

tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:35<23:25, 20.98s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:56<23:07, 21.02s/it][A

tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:17<22:43, 20.98s/it][A

tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:38<22:22, 20.98s/it][A

tensor(0.6256, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:59<22:00, 20.96s/it][A

tensor(0.5962, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:41, 20.99s/it][A

tensor(0.5958, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:41<21:20, 20.99s/it][A

tensor(0.6067, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [08:02<20:59, 20.99s/it][A

tensor(0.6438, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:23<20:37, 20.98s/it][A

tensor(0.6987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:44<20:17, 20.99s/it][A

tensor(0.5831, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:04<19:55, 20.97s/it][A

tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:25<19:32, 20.94s/it][A

tensor(0.7035, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:46<19:10, 20.92s/it][A

tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:07<18:50, 20.93s/it][A

tensor(0.6350, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:28<18:33, 21.00s/it][A

tensor(0.6234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:49<18:13, 21.04s/it][A

tensor(0.6261, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:10<17:48, 20.96s/it][A

loss: tensor(0.6433, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:31<17:24, 20.89s/it][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:52<17:01, 20.85s/it][A

loss: tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:13<16:40, 20.85s/it][A

loss: tensor(0.5988, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:33<16:14, 20.74s/it][A

loss: tensor(0.7504, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:54<15:51, 20.67s/it][A

loss: tensor(0.6453, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:14<15:30, 20.67s/it][A

loss: tensor(0.6763, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:35<15:12, 20.74s/it][A

loss: tensor(0.6280, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:56<14:50, 20.70s/it][A

loss: tensor(0.6065, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:16<14:28, 20.68s/it][A

loss: tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:37<14:05, 20.63s/it][A

loss: tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:57<13:43, 20.60s/it][A

loss: tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:18<13:25, 20.65s/it][A

loss: tensor(0.6460, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:39<13:08, 20.76s/it][A

tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:00<12:53, 20.90s/it][A

tensor(0.5997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:21<12:33, 20.93s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:42<12:13, 20.95s/it][A

tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:03<11:52, 20.96s/it][A

tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:24<11:30, 20.93s/it][A

loss: tensor(0.6593, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:45<11:08, 20.91s/it][A

loss: tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:06<10:49, 20.94s/it][A

tensor(0.6890, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:27<10:28, 20.94s/it][A

loss: tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:48<10:08, 20.98s/it][A

tensor(0.5967, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:09<09:46, 20.95s/it][A

tensor(0.7094, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:30<09:24, 20.91s/it][A

tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:51<09:02, 20.87s/it][A

tensor(0.5831, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:11<08:41, 20.84s/it][A

tensor(0.5941, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:32<08:20, 20.84s/it][A

tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:53<07:59, 20.84s/it][A

tensor(0.7029, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:14<07:38, 20.85s/it][A

tensor(0.6725, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:35<07:17, 20.85s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:56<06:56, 20.84s/it][A

tensor(0.6445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:16<06:32, 20.64s/it][A

tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:36<06:08, 20.49s/it][A

tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:56<05:46, 20.39s/it][A

tensor(0.6285, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:16<05:25, 20.32s/it][A

tensor(0.6474, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:37<05:06, 20.43s/it][A

tensor(0.6297, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:58<04:46, 20.50s/it][A

tensor(0.6543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:18<04:27, 20.59s/it][A

tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:39<04:07, 20.63s/it][A

tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:00<03:47, 20.64s/it][A

tensor(0.6199, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:21<03:26, 20.67s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:41<03:06, 20.73s/it][A

tensor(0.6934, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:02<02:45, 20.75s/it][A

tensor(0.6753, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:23<02:25, 20.72s/it][A

tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:43<02:03, 20.52s/it][A

tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:03<01:41, 20.36s/it][A

tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:23<01:21, 20.26s/it][A

tensor(0.6448, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:43<01:00, 20.16s/it][A

tensor(0.6295, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:03<00:40, 20.10s/it][A

tensor(0.5909, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:23<00:20, 20.05s/it][A

tensor(0.6773, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:30<00:00, 20.60s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6628, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.641658371471497

	train acc: 0.644321604600219

	training prec: 0.8402539781185492

	training rec: 0.644321604600219

	training f1: 0.7049410250729161

	Current Learning rate:  0.0



  2%|▏         | 1/42 [00:02<01:44,  2.54s/it][A
  5%|▍         | 2/42 [00:05<01:39,  2.50s/it][A
  7%|▋         | 3/42 [00:07<01:37,  2.50s/it][A
 10%|▉         | 4/42 [00:10<01:35,  2.51s/it][A
 12%|█▏        | 5/42 [00:12<01:32,  2.49s/it][A
 14%|█▍        | 6/42 [00:15<01:29,  2.50s/it][A
 17%|█▋        | 7/42 [00:17<01:27,  2.50s/it][A
 19%|█▉        | 8/42 [00:19<01:24,  2.49s/it][A
 21%|██▏       | 9/42 [00:22<01:22,  2.50s/it][A
 24%|██▍       | 10/42 [00:24<01:19,  2.50s/it][A
 26%|██▌       | 11/42 [00:27<01:17,  2.49s/it][A
 29%|██▊       | 12/42 [00:29<01:14,  2.50s/it][A
 31%|███       | 13/42 [00:32<01:12,  2.51s/it][A
 33%|███▎      | 14/42 [00:34<01:09,  2.49s/it][A
 36%|███▌      | 15/42 [00:37<01:07,  2.50s/it][A
 38%|███▊      | 16/42 [00:39<01:05,  2.50s/it][A
 40%|████      | 17/42 [00:42<01:02,  2.49s/it][A
 43%|████▎     | 18/42 [00:45<01:00,  2.52s/it][A
 45%|████▌     | 19/42 [00:47<00:57,  2.52s/it][A
 48%|████▊     | 20/42 [00:49<00:54,  2


	Validation loss: 0.6245249878792536

	Validation acc: 0.6703869047619048

	Validation prec: 0.8619947952472977

	Validation rec: 0.6703869047619048

	Validation f1: 0.729068359400693





In [21]:
# train_loss.numpy()
# train_loss


In [22]:
# # Saving models
# torch.save(model.state_dict(), "finetuned.pth")

# #load models
# model = Neural

In [23]:
## saving the model 
torch.save(model.state_dict(), "finetuned-35-epochs-1e5-lr-with-weighted-loss.pth")

In [20]:
# loading the locally saved model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = CausalityBERT()
model.load_state_dict(torch.load("finetuned-35-epochs-1e5-lr-with-weighted-loss.pth"))
## Move the model to the GPU 
model.to(device)
model.eval() # gettign in the eval mode 



You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.3.output.dense.bias', 'roberta.encoder.layer.5.attention.self.query.weight', 'roberta.encoder.layer.4.attention.self.key.weight', 'roberta.encoder.layer.8.intermediate.dense.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.dense.bias', 'roberta.encoder.layer.5.attention.output.dense.weight', 'roberta.encoder.layer.2.attention.self.key.weight', 'roberta.encoder.layer.2.attention.output.dense.bias', 'roberta.encoder.layer.9.attention.output.LayerNorm.bias', 'roberta.encoder.layer.6.attention.output.dense.weight', 'roberta.encoder.layer.2.attention.self.query.weight', 'roberta.encoder.layer.1.intermediate.dense.weight', 'roberta.encoder.layer.4.output.dense.weight'

CausalityBERT(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(130, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True

## evaluation on the test dataset 



In [None]:
## evaluation on the test dataset 

# test_accuracy, test_mcc_accuracy, nb_test_steps = 0, 0, 0 
# test_mcc_accuracy, nb_test_steps = 0, 0,

# test_accuracy = []
# test_loss = []
# test_acc = []
# test_prec = []
# test_rec = []
# test_f1 = []

# for batch in tqdm(test_loader):
#     batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
#     b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
#     with torch.no_grad():
#         model.eval()
#         logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
    
    
#     loss = loss_fn(logits, b_labels)
#     test_loss.append(loss.item())

#     # move logits and labels to CPU
#     logits = logits.detach().to('cpu').numpy()
#     label_ids = b_labels.to('cpu').numpy()

#     pred_flat = np.argmax(logits, axis=1).flatten()
#     labels_flat = label_ids.flatten()

# #     eval_accuracy += accuracy_score(labels_flat, pred_flat)
# #     test_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  


#     metrics = compute_metrics(pred_flat, labels_flat)
#     test_acc.append(metrics["accuracy"])
#     test_prec.append(metrics["precision"])
#     test_rec.append(metrics["recall"])
#     test_f1.append(metrics["f1"])
# #     nb_test_steps += 1
    

# # print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
# # print(F'\n\ttest MCC Accuracy: {test_mcc_accuracy / nb_test_steps}') # eval M
# print(F'\n\ttest loss: {np.mean(test_loss)}')
# print(F'\n\ttest acc: {np.mean(test_acc)}')
# print(F'\n\ttest prec: {np.mean(test_prec)}')
# print(F'\n\ttest rec: {np.mean(test_rec)}')
# print(F'\n\ttest f1: {np.mean(test_f1)}')

In [21]:
loss_fn = CrossEntropyLoss()
test_loss = []
test_acc = []
test_prec = []
test_rec = []
test_f1 = []


for batch in tqdm(test_loader):
    batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
    b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader

    with torch.no_grad():
        model.eval()
        logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
    
    print("logits:")
    print(logits)
    # move logits and labels to CPU
    logits = logits.detach().to('cpu').numpy()
    label_ids = b_labels.to('cpu').numpy()

    pred_flat = np.argmax(logits, axis=1).flatten()
    labels_flat = label_ids.flatten()
    print("pred:", pred_flat)
    print("real:", labels_flat)
    
    metrics = compute_metrics(pred_flat, labels_flat)
    test_acc.append(metrics["accuracy"])
    test_prec.append(metrics["precision"])
    test_rec.append(metrics["recall"])
    test_f1.append(metrics["f1"])

    
print(F'\n\ttest loss: {np.mean(test_loss)}')
print(F'\n\ttest acc: {np.mean(test_acc)}')
print(F'\n\ttest prec: {np.mean(test_prec)}')
print(F'\n\ttest rec: {np.mean(test_rec)}')
print(F'\n\ttest f1: {np.mean(test_f1)}')





  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
  2%|▏         | 1/52 [00:03<03:21,  3.95s/it]

logits:
tensor([[ 0.4256, -0.3423],
        [ 0.1310, -0.0738],
        [ 0.2632, -0.1696],
        [ 0.2971, -0.2083],
        [-0.0373,  0.0746],
        [ 0.1812, -0.1131],
        [ 0.4347, -0.3022],
        [ 0.1074, -0.0754],
        [ 0.5310, -0.4874],
        [ 0.0371,  0.0236],
        [-0.0361,  0.0617],
        [-0.1453,  0.1525],
        [ 0.1017, -0.0571],
        [ 0.5132, -0.4816],
        [ 0.2678, -0.2946],
        [ 0.1956, -0.1091],
        [ 0.1624, -0.0845],
        [ 0.0591,  0.0203],
        [ 0.4443, -0.3717],
        [-0.1787,  0.1903],
        [ 0.2134, -0.1454],
        [ 0.0061,  0.0445],
        [ 0.4392, -0.4210],
        [ 0.0307,  0.0116],
        [-0.0357,  0.0487],
        [ 0.4546, -0.3905],
        [ 0.4220, -0.3644],
        [ 0.1868, -0.1318],
        [ 0.3048, -0.1954],
        [-0.1289,  0.1426],
        [ 0.1381, -0.0573],
        [ 0.2063, -0.1357]], device='cuda:0')
pred: [0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0]
real: 

  4%|▍         | 2/52 [00:07<03:01,  3.62s/it]

logits:
tensor([[-0.0382,  0.0713],
        [-0.2000,  0.2110],
        [ 0.1959, -0.1433],
        [-0.0892,  0.1253],
        [-0.1888,  0.2031],
        [-0.0695,  0.0859],
        [ 0.1200, -0.0728],
        [-0.0549,  0.1213],
        [ 0.2026, -0.1208],
        [-0.1050,  0.1409],
        [ 0.0675, -0.0675],
        [-0.1248,  0.1231],
        [ 0.4976, -0.4370],
        [ 0.0528,  0.0122],
        [-0.0904,  0.0998],
        [ 0.1958, -0.1208],
        [ 0.0117, -0.0025],
        [-0.0900,  0.1043],
        [-0.0763,  0.0159],
        [ 0.3325, -0.2349],
        [ 0.2765, -0.2186],
        [-0.1109,  0.1443],
        [ 0.4811, -0.3659],
        [-0.1753,  0.2032],
        [-0.0960,  0.1576],
        [ 0.0704, -0.0578],
        [-0.1651,  0.1625],
        [ 0.4638, -0.3726],
        [ 0.1662, -0.1070],
        [-0.0544,  0.0524],
        [-0.1233,  0.1157],
        [-0.2027,  0.2232]], device='cuda:0')
pred: [1 1 0 1 1 1 0 1 0 1 0 1 0 0 1 0 0 1 1 0 0 1 0 1 1 0 1 0 0 1 1 1]
real: 

  6%|▌         | 3/52 [00:10<02:49,  3.47s/it]

logits:
tensor([[-0.1304,  0.1365],
        [-0.2265,  0.2075],
        [-0.1017,  0.1552],
        [ 0.2118, -0.1058],
        [ 0.2788, -0.2144],
        [ 0.4271, -0.3066],
        [ 0.1060, -0.0603],
        [ 0.2798, -0.2528],
        [ 0.1688, -0.0769],
        [ 0.1467, -0.0584],
        [-0.1704,  0.2037],
        [-0.1352,  0.1680],
        [-0.0860,  0.1266],
        [-0.1905,  0.1951],
        [ 0.4187, -0.3469],
        [-0.2000,  0.2248],
        [-0.0735,  0.1277],
        [ 0.4561, -0.4010],
        [ 0.1934, -0.1667],
        [ 0.0013,  0.0068],
        [-0.1684,  0.2057],
        [-0.0570,  0.1050],
        [ 0.3471, -0.2617],
        [ 0.0385, -0.0113],
        [ 0.4762, -0.4076],
        [-0.0473,  0.1168],
        [-0.0137,  0.0631],
        [ 0.0095,  0.0362],
        [ 0.2709, -0.1876],
        [-0.1097,  0.1490],
        [ 0.1136, -0.0865],
        [-0.1860,  0.2251]], device='cuda:0')
pred: [1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 1 1 0 1 0 1]
real: 

  8%|▊         | 4/52 [00:13<02:44,  3.42s/it]

logits:
tensor([[ 0.3612, -0.2511],
        [ 0.0366,  0.0176],
        [ 0.2590, -0.2148],
        [-0.0638,  0.0508],
        [-0.0072,  0.0273],
        [-0.1546,  0.1996],
        [ 0.2069, -0.1367],
        [ 0.0936, -0.0373],
        [ 0.0980, -0.0795],
        [-0.1634,  0.1998],
        [ 0.1406, -0.0590],
        [-0.0159,  0.0529],
        [ 0.1050, -0.1092],
        [ 0.4095, -0.3509],
        [ 0.3772, -0.3368],
        [-0.1112,  0.1416],
        [-0.1871,  0.2057],
        [ 0.1372, -0.1135],
        [ 0.4170, -0.3741],
        [ 0.0109,  0.0233],
        [-0.0755,  0.1206],
        [ 0.4078, -0.3506],
        [-0.1199,  0.1044],
        [ 0.1134, -0.0236],
        [ 0.3519, -0.2785],
        [-0.1649,  0.1904],
        [ 0.3786, -0.3318],
        [ 0.2347, -0.1859],
        [-0.1094,  0.1392],
        [ 0.4912, -0.4093],
        [ 0.3105, -0.1832],
        [ 0.0216,  0.0073]], device='cuda:0')
pred: [0 0 0 1 1 1 0 0 0 1 0 1 0 0 0 1 1 0 0 1 1 0 1 0 0 1 0 0 1 0 0 0]
real: 

 10%|▉         | 5/52 [00:17<02:38,  3.38s/it]

logits:
tensor([[ 0.4581, -0.3378],
        [ 0.4333, -0.3591],
        [-0.0313,  0.0278],
        [ 0.2171, -0.1501],
        [ 0.4836, -0.4206],
        [ 0.4258, -0.3381],
        [ 0.1043, -0.0434],
        [-0.0905,  0.1238],
        [ 0.2280, -0.1721],
        [ 0.0110,  0.0110],
        [ 0.0321,  0.0292],
        [ 0.3020, -0.2285],
        [ 0.3715, -0.3368],
        [ 0.1850, -0.1007],
        [ 0.2980, -0.2423],
        [ 0.4492, -0.4056],
        [-0.0992,  0.1341],
        [ 0.3491, -0.2581],
        [-0.0048,  0.0538],
        [ 0.4090, -0.3028],
        [ 0.4294, -0.2907],
        [ 0.0867, -0.0527],
        [-0.0436,  0.1044],
        [-0.0165,  0.0904],
        [ 0.4315, -0.3721],
        [-0.1151,  0.1484],
        [ 0.0343, -0.0326],
        [ 0.1083, -0.0253],
        [-0.1399,  0.1694],
        [ 0.1276, -0.0308],
        [-0.0012,  0.0534],
        [ 0.1748, -0.1377]], device='cuda:0')
pred: [0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 1 0 1 0]
real: 

 12%|█▏        | 6/52 [00:20<02:33,  3.35s/it]

logits:
tensor([[ 0.4460, -0.4094],
        [ 0.1605, -0.0978],
        [-0.1819,  0.2085],
        [-0.1970,  0.2201],
        [ 0.1569, -0.1274],
        [-0.0099,  0.0505],
        [ 0.0722, -0.0466],
        [ 0.1944, -0.1003],
        [ 0.3093, -0.1697],
        [ 0.4428, -0.3949],
        [-0.0968,  0.1153],
        [-0.2001,  0.2277],
        [ 0.1312, -0.0884],
        [ 0.2033, -0.1321],
        [ 0.0415,  0.0185],
        [ 0.2194, -0.1499],
        [-0.1114,  0.1395],
        [ 0.3831, -0.3303],
        [ 0.4721, -0.4225],
        [ 0.0067,  0.0228],
        [-0.1338,  0.1465],
        [ 0.2848, -0.1703],
        [ 0.4187, -0.3878],
        [-0.0754,  0.0840],
        [ 0.5505, -0.5080],
        [ 0.5129, -0.4186],
        [-0.1552,  0.1959],
        [-0.1716,  0.1898],
        [-0.1196,  0.1330],
        [ 0.1102, -0.0520],
        [-0.2291,  0.2364],
        [ 0.1564, -0.0698]], device='cuda:0')
pred: [0 0 1 1 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 1 0 0 1 0 0 1 1 1 0 1 0]
real: 

 13%|█▎        | 7/52 [00:23<02:29,  3.32s/it]

logits:
tensor([[-0.1396,  0.1803],
        [-0.1411,  0.1490],
        [ 0.2515, -0.1654],
        [ 0.1423, -0.0773],
        [-0.0638,  0.1221],
        [ 0.5116, -0.4327],
        [-0.0617,  0.1016],
        [ 0.3021, -0.2713],
        [ 0.3923, -0.2449],
        [-0.1175,  0.1504],
        [ 0.0368, -0.0087],
        [ 0.4171, -0.3184],
        [ 0.0944, -0.0686],
        [ 0.0899, -0.0690],
        [-0.0824,  0.1241],
        [-0.1357,  0.1280],
        [ 0.4359, -0.3682],
        [-0.1239,  0.1079],
        [ 0.2106, -0.1599],
        [ 0.1011, -0.0663],
        [ 0.1163, -0.0637],
        [ 0.4657, -0.3679],
        [ 0.0059,  0.0222],
        [ 0.4765, -0.4025],
        [ 0.0364, -0.0086],
        [ 0.0100,  0.0672],
        [ 0.1221, -0.0459],
        [ 0.3938, -0.3117],
        [-0.0231,  0.0429],
        [ 0.4250, -0.3760],
        [-0.0015,  0.0749],
        [ 0.3218, -0.1676]], device='cuda:0')
pred: [1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 1 0 0 1 0 0 1 0 1 0]
real: 

 15%|█▌        | 8/52 [00:27<02:25,  3.30s/it]

logits:
tensor([[ 0.0086,  0.0159],
        [ 0.0758,  0.0362],
        [ 0.4756, -0.4226],
        [ 0.1059, -0.0288],
        [-0.1521,  0.1863],
        [-0.0658,  0.0798],
        [ 0.0024,  0.0453],
        [ 0.0334,  0.0143],
        [ 0.2627, -0.1899],
        [ 0.1652, -0.1091],
        [ 0.1240, -0.0455],
        [ 0.2129, -0.1451],
        [ 0.0442, -0.0107],
        [-0.1460,  0.1837],
        [ 0.1545, -0.1100],
        [ 0.0165,  0.0477],
        [ 0.3511, -0.2476],
        [ 0.3585, -0.2532],
        [ 0.0300,  0.0222],
        [-0.0523,  0.0876],
        [-0.1165,  0.1131],
        [-0.1775,  0.2060],
        [-0.0910,  0.0958],
        [ 0.3647, -0.2724],
        [ 0.0701, -0.0228],
        [ 0.2664, -0.1614],
        [ 0.1983, -0.1508],
        [ 0.2360, -0.2235],
        [-0.1225,  0.1222],
        [-0.1547,  0.1779],
        [-0.1756,  0.1709],
        [ 0.2320, -0.1537]], device='cuda:0')
pred: [1 0 0 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 0 1 1 1 1 0 0 0 0 0 1 1 1 0]
real: 

 17%|█▋        | 9/52 [00:30<02:24,  3.36s/it]

logits:
tensor([[-0.0973,  0.1324],
        [ 0.1666, -0.1186],
        [ 0.1852, -0.1449],
        [-0.0912,  0.1125],
        [-0.1075,  0.1600],
        [ 0.2063, -0.0983],
        [ 0.1431, -0.1230],
        [-0.0631,  0.0886],
        [ 0.1743, -0.1058],
        [ 0.0162,  0.0134],
        [ 0.2788, -0.2504],
        [ 0.1155, -0.0382],
        [ 0.2326, -0.1400],
        [ 0.0752, -0.0123],
        [ 0.1902, -0.1297],
        [ 0.4048, -0.2704],
        [ 0.3360, -0.2118],
        [-0.1163,  0.1282],
        [ 0.3338, -0.2397],
        [ 0.2651, -0.2186],
        [ 0.1645, -0.1242],
        [ 0.4112, -0.2974],
        [ 0.4122, -0.3559],
        [-0.1795,  0.2055],
        [-0.0118,  0.0408],
        [-0.0900,  0.1439],
        [-0.1503,  0.1864],
        [-0.1403,  0.1672],
        [ 0.1235, -0.0719],
        [ 0.4367, -0.3915],
        [ 0.1760, -0.1325],
        [ 0.4230, -0.3482]], device='cuda:0')
pred: [1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 1 0 0 0 0]
real: 

 19%|█▉        | 10/52 [00:33<02:19,  3.31s/it]

logits:
tensor([[ 0.0870,  0.0125],
        [ 0.2993, -0.1856],
        [-0.1016,  0.1393],
        [ 0.4393, -0.4002],
        [-0.0997,  0.1199],
        [ 0.4469, -0.3509],
        [ 0.2026, -0.1407],
        [ 0.0973, -0.0900],
        [-0.0634,  0.0916],
        [ 0.0305, -0.0303],
        [ 0.3749, -0.2051],
        [-0.0861,  0.1118],
        [-0.0996,  0.0949],
        [-0.0708,  0.0759],
        [ 0.3418, -0.2601],
        [ 0.4273, -0.3906],
        [-0.2178,  0.2158],
        [ 0.3254, -0.2358],
        [-0.0993,  0.1380],
        [ 0.4117, -0.3470],
        [-0.1373,  0.1668],
        [ 0.0600, -0.0007],
        [ 0.3323, -0.3149],
        [ 0.4988, -0.4657],
        [ 0.4034, -0.3470],
        [ 0.2097, -0.1186],
        [ 0.1230, -0.0708],
        [-0.1879,  0.1712],
        [ 0.3281, -0.2846],
        [-0.1644,  0.2001],
        [-0.1574,  0.1676],
        [-0.0513,  0.0814]], device='cuda:0')
pred: [0 0 1 0 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 1 1]
real: 

 21%|██        | 11/52 [00:37<02:17,  3.36s/it]

logits:
tensor([[-0.0120,  0.0606],
        [ 0.0176,  0.0333],
        [ 0.3504, -0.2530],
        [ 0.3827, -0.2659],
        [ 0.4578, -0.4250],
        [ 0.0281,  0.0151],
        [ 0.1720, -0.1193],
        [ 0.0405,  0.0251],
        [ 0.1952, -0.1187],
        [-0.1477,  0.1696],
        [ 0.2258, -0.1859],
        [ 0.2346, -0.1835],
        [ 0.1247, -0.0114],
        [-0.1257,  0.1572],
        [-0.0117,  0.0432],
        [ 0.4729, -0.4094],
        [ 0.3538, -0.2793],
        [ 0.4038, -0.3538],
        [-0.0930,  0.1709],
        [ 0.0839, -0.0296],
        [-0.0421,  0.0862],
        [ 0.3427, -0.2461],
        [ 0.1086, -0.0702],
        [ 0.0902, -0.0554],
        [ 0.2348, -0.1866],
        [ 0.3991, -0.2675],
        [ 0.2504, -0.1821],
        [-0.1108,  0.1515],
        [ 0.1467, -0.0818],
        [ 0.4117, -0.3789],
        [ 0.4785, -0.4377],
        [-0.0706,  0.0907]], device='cuda:0')
pred: [1 1 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 1]
real: 

 23%|██▎       | 12/52 [00:40<02:16,  3.41s/it]

logits:
tensor([[ 0.0864, -0.0472],
        [ 0.0504, -0.0128],
        [-0.1205,  0.1548],
        [ 0.3032, -0.1752],
        [-0.0965,  0.1412],
        [ 0.0495, -0.0037],
        [ 0.1366, -0.1201],
        [-0.1198,  0.1422],
        [ 0.0502, -0.0169],
        [ 0.3149, -0.2857],
        [ 0.3652, -0.3127],
        [ 0.4986, -0.4032],
        [ 0.2575, -0.1447],
        [ 0.1003, -0.0883],
        [ 0.0426,  0.0035],
        [-0.2345,  0.2683],
        [ 0.0182,  0.0719],
        [-0.1514,  0.1633],
        [-0.0686,  0.1091],
        [ 0.4096, -0.3086],
        [ 0.0723, -0.0588],
        [ 0.0191,  0.0472],
        [ 0.3097, -0.2178],
        [ 0.2481, -0.2245],
        [ 0.4691, -0.4229],
        [ 0.1656, -0.1184],
        [-0.1842,  0.2210],
        [ 0.0249, -0.0055],
        [-0.1900,  0.2045],
        [ 0.1692, -0.0693],
        [ 0.2306, -0.1513],
        [ 0.0558, -0.0538]], device='cuda:0')
pred: [0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 0]
real: 

 25%|██▌       | 13/52 [00:44<02:12,  3.39s/it]

logits:
tensor([[-0.0313,  0.0758],
        [ 0.0164,  0.0325],
        [ 0.4456, -0.3852],
        [ 0.2856, -0.1692],
        [-0.0625,  0.0944],
        [ 0.3632, -0.2910],
        [-0.1281,  0.1859],
        [ 0.1432, -0.0898],
        [ 0.3880, -0.2736],
        [ 0.1102, -0.0505],
        [ 0.1047, -0.1094],
        [-0.0947,  0.1576],
        [ 0.3011, -0.3148],
        [-0.0671,  0.0740],
        [-0.0089,  0.0711],
        [-0.0089,  0.0305],
        [ 0.0806,  0.0072],
        [ 0.3293, -0.2504],
        [ 0.1746, -0.1187],
        [-0.2547,  0.2481],
        [ 0.3279, -0.2903],
        [ 0.2346, -0.1369],
        [ 0.3958, -0.3046],
        [ 0.4424, -0.3369],
        [ 0.1106, -0.0421],
        [ 0.1631, -0.1150],
        [-0.1741,  0.1796],
        [ 0.3719, -0.3059],
        [-0.0684,  0.0763],
        [ 0.0954, -0.0335],
        [-0.0667,  0.0969],
        [ 0.1547, -0.0938]], device='cuda:0')
pred: [1 1 0 0 1 0 1 0 0 0 0 1 0 1 1 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1 0]
real: 

 27%|██▋       | 14/52 [00:47<02:08,  3.39s/it]

logits:
tensor([[-0.1623,  0.1952],
        [-0.1237,  0.0939],
        [ 0.1189, -0.0832],
        [ 0.3247, -0.2053],
        [ 0.0888, -0.0194],
        [ 0.2688, -0.1336],
        [-0.0816,  0.1114],
        [ 0.1837, -0.1262],
        [ 0.1441, -0.0874],
        [ 0.2255, -0.1378],
        [ 0.2091, -0.1648],
        [-0.0787,  0.0638],
        [ 0.1425, -0.1050],
        [-0.0227,  0.0656],
        [ 0.1068, -0.0479],
        [ 0.1816, -0.1372],
        [ 0.2385, -0.1180],
        [-0.0808,  0.1065],
        [ 0.2675, -0.1758],
        [ 0.4453, -0.3635],
        [ 0.1343, -0.0870],
        [ 0.1630, -0.0722],
        [ 0.3298, -0.2709],
        [ 0.1461, -0.1113],
        [-0.0609,  0.1138],
        [ 0.3624, -0.1992],
        [-0.1687,  0.2061],
        [-0.0169,  0.0660],
        [-0.1006,  0.1035],
        [ 0.1999, -0.1189],
        [ 0.2067, -0.0851],
        [ 0.3216, -0.2288]], device='cuda:0')
pred: [1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 0]
real: 

 29%|██▉       | 15/52 [00:50<02:03,  3.33s/it]

logits:
tensor([[ 0.2375, -0.2100],
        [ 0.0151,  0.0347],
        [-0.2096,  0.2230],
        [ 0.1023, -0.0120],
        [-0.1269,  0.1727],
        [ 0.4683, -0.4277],
        [ 0.0991, -0.0317],
        [-0.1654,  0.2034],
        [ 0.2632, -0.1932],
        [ 0.1264, -0.1202],
        [-0.1840,  0.1826],
        [ 0.2284, -0.1400],
        [ 0.2277, -0.2057],
        [ 0.0539, -0.0380],
        [ 0.2562, -0.1717],
        [-0.0802,  0.0906],
        [ 0.0062,  0.0451],
        [ 0.1223, -0.1334],
        [-0.0804,  0.1251],
        [ 0.3633, -0.2172],
        [ 0.0420,  0.0327],
        [ 0.3785, -0.2991],
        [-0.0602,  0.1153],
        [ 0.1910, -0.1318],
        [ 0.1427, -0.1246],
        [ 0.0250,  0.0562],
        [ 0.4730, -0.4450],
        [-0.1222,  0.1887],
        [-0.1293,  0.1511],
        [-0.1631,  0.1906],
        [ 0.1771, -0.1440],
        [-0.1491,  0.1951]], device='cuda:0')
pred: [0 1 1 0 1 0 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 0 1 1 1 0 1]
real: 

 31%|███       | 16/52 [00:54<02:00,  3.35s/it]

logits:
tensor([[ 0.1747, -0.1489],
        [ 0.3589, -0.3256],
        [ 0.2099, -0.1535],
        [-0.0068,  0.0568],
        [-0.1273,  0.1490],
        [-0.0887,  0.1236],
        [ 0.2632, -0.1894],
        [-0.1800,  0.1935],
        [-0.1085,  0.1472],
        [-0.0280,  0.0974],
        [ 0.1726, -0.0813],
        [ 0.4184, -0.3634],
        [-0.0372,  0.0576],
        [ 0.0983, -0.0487],
        [ 0.4549, -0.3404],
        [ 0.0916, -0.0394],
        [ 0.2841, -0.1788],
        [ 0.1328, -0.0612],
        [ 0.3736, -0.2509],
        [ 0.3545, -0.2750],
        [-0.1343,  0.1504],
        [ 0.1173, -0.1084],
        [ 0.0536, -0.0181],
        [-0.0580,  0.1071],
        [ 0.3224, -0.2664],
        [ 0.2105, -0.1453],
        [ 0.3213, -0.2623],
        [-0.1678,  0.1895],
        [ 0.4504, -0.4083],
        [-0.0378,  0.0962],
        [-0.0909,  0.1140],
        [ 0.1337, -0.0906]], device='cuda:0')
pred: [0 0 0 1 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 1 0]
real: 

 33%|███▎      | 17/52 [00:57<01:57,  3.37s/it]

logits:
tensor([[-0.1346,  0.1489],
        [-0.0548,  0.0503],
        [ 0.4726, -0.4455],
        [ 0.4240, -0.2155],
        [ 0.0593, -0.0877],
        [ 0.1504, -0.1017],
        [-0.1295,  0.1427],
        [ 0.3898, -0.4087],
        [-0.2244,  0.2392],
        [ 0.0174,  0.0272],
        [ 0.3239, -0.2922],
        [ 0.0176,  0.0425],
        [ 0.4088, -0.3262],
        [ 0.1182, -0.0490],
        [-0.1358,  0.1365],
        [-0.0241,  0.0823],
        [-0.0683,  0.1147],
        [ 0.2072, -0.1636],
        [ 0.1920, -0.1322],
        [ 0.0449, -0.0218],
        [-0.0724,  0.1161],
        [ 0.2686, -0.1456],
        [-0.1242,  0.1558],
        [ 0.0372,  0.0255],
        [ 0.3137, -0.2192],
        [ 0.2760, -0.1929],
        [ 0.3818, -0.3771],
        [-0.1536,  0.1910],
        [ 0.3877, -0.2822],
        [-0.1470,  0.1611],
        [ 0.1122, -0.0392],
        [ 0.4642, -0.3923]], device='cuda:0')
pred: [1 1 0 0 0 0 1 0 1 1 0 1 0 0 1 1 1 0 0 0 1 0 1 0 0 0 0 1 0 1 0 0]
real: 

 35%|███▍      | 18/52 [01:00<01:53,  3.33s/it]

logits:
tensor([[ 3.3271e-01, -2.3072e-01],
        [-1.4571e-02,  4.8058e-02],
        [ 1.7682e-01, -1.4783e-01],
        [-5.7255e-02,  9.6092e-02],
        [-6.8607e-02,  5.7765e-02],
        [ 1.8456e-01, -1.4119e-01],
        [ 3.0125e-01, -2.0867e-01],
        [-1.9203e-01,  1.9605e-01],
        [ 3.0660e-01, -2.3131e-01],
        [ 3.6784e-01, -3.1253e-01],
        [-4.4949e-02,  7.3757e-02],
        [-1.6881e-01,  1.9571e-01],
        [ 2.6225e-01, -2.2836e-01],
        [ 1.7352e-02, -2.6334e-02],
        [ 3.3096e-01, -2.8480e-01],
        [-1.8058e-01,  2.3718e-01],
        [ 1.4036e-01, -6.0960e-02],
        [-6.1513e-02,  1.1864e-01],
        [ 1.5039e-02,  3.5124e-02],
        [-3.7380e-04,  3.1939e-02],
        [-1.3891e-01,  1.7775e-01],
        [ 3.3779e-01, -2.6602e-01],
        [-1.5270e-01,  1.3130e-01],
        [ 2.7720e-01, -2.1249e-01],
        [-1.5317e-02,  6.3840e-02],
        [-5.5431e-02,  1.0915e-01],
        [ 4.0358e-01, -4.0476e-01],
        [ 1.0803e-01

 37%|███▋      | 19/52 [01:04<01:50,  3.35s/it]

logits:
tensor([[-0.1289,  0.1098],
        [ 0.0276,  0.0061],
        [ 0.3459, -0.2565],
        [ 0.0101,  0.0398],
        [ 0.2045, -0.1481],
        [-0.1040,  0.1371],
        [ 0.4027, -0.3298],
        [-0.0024,  0.0443],
        [-0.0993,  0.1019],
        [ 0.0043,  0.0474],
        [ 0.2986, -0.1955],
        [ 0.4781, -0.4588],
        [ 0.4973, -0.4600],
        [ 0.4839, -0.4098],
        [-0.0414,  0.1025],
        [ 0.3863, -0.2235],
        [ 0.1999, -0.1469],
        [ 0.0399,  0.0277],
        [-0.0630,  0.0951],
        [-0.1050,  0.1166],
        [ 0.1100, -0.0376],
        [ 0.2864, -0.1957],
        [ 0.2010, -0.1631],
        [ 0.2886, -0.2174],
        [ 0.3115, -0.2726],
        [ 0.1276, -0.0766],
        [ 0.0783, -0.0438],
        [ 0.2225, -0.1701],
        [-0.1452,  0.1670],
        [-0.0913,  0.0851],
        [ 0.2181, -0.1315],
        [-0.0177,  0.0515]], device='cuda:0')
pred: [1 0 0 1 0 1 0 1 1 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1]
real: 

 38%|███▊      | 20/52 [01:07<01:48,  3.39s/it]

logits:
tensor([[ 0.2211, -0.1480],
        [ 0.2758, -0.1644],
        [-0.0546,  0.0731],
        [-0.1482,  0.1697],
        [ 0.0193,  0.0256],
        [ 0.0737,  0.0087],
        [-0.1417,  0.1646],
        [ 0.3868, -0.3100],
        [ 0.3465, -0.2382],
        [ 0.3924, -0.2442],
        [ 0.3969, -0.4114],
        [ 0.3882, -0.3403],
        [-0.0434,  0.0815],
        [-0.1271,  0.1216],
        [-0.0319,  0.0447],
        [ 0.0935, -0.0648],
        [-0.0874,  0.1441],
        [ 0.3643, -0.3476],
        [-0.0112,  0.0685],
        [-0.0449,  0.0950],
        [ 0.2060, -0.1243],
        [-0.0941,  0.0889],
        [ 0.1141, -0.1014],
        [ 0.0604, -0.0217],
        [ 0.2127, -0.1896],
        [ 0.3220, -0.2210],
        [ 0.1263, -0.1017],
        [-0.0929,  0.0746],
        [ 0.1243, -0.0087],
        [ 0.0129,  0.0386],
        [ 0.1864, -0.1664],
        [ 0.2660, -0.3128]], device='cuda:0')
pred: [0 0 1 1 1 0 1 0 0 0 0 0 1 1 1 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0]
real: 

 40%|████      | 21/52 [01:10<01:44,  3.37s/it]

logits:
tensor([[ 0.0791, -0.0057],
        [ 0.3369, -0.2669],
        [ 0.2755, -0.1446],
        [ 0.1306, -0.0850],
        [ 0.1668, -0.1139],
        [ 0.4801, -0.3958],
        [ 0.5130, -0.4512],
        [-0.0272,  0.0384],
        [ 0.2796, -0.1242],
        [ 0.3535, -0.2579],
        [ 0.3839, -0.2755],
        [ 0.5170, -0.4233],
        [ 0.1587, -0.1290],
        [-0.0254,  0.0523],
        [ 0.0375, -0.0284],
        [ 0.3904, -0.3074],
        [ 0.0439, -0.0082],
        [-0.1278,  0.1496],
        [ 0.0528,  0.0149],
        [ 0.1732, -0.0891],
        [-0.0339,  0.0307],
        [ 0.1239, -0.0676],
        [-0.1337,  0.1944],
        [-0.1351,  0.1912],
        [ 0.0374,  0.0448],
        [ 0.1738, -0.0733],
        [ 0.4076, -0.3755],
        [ 0.4391, -0.3484],
        [ 0.1419, -0.0924],
        [ 0.0962, -0.0514],
        [ 0.0708, -0.0253],
        [-0.0451,  0.1033]], device='cuda:0')
pred: [0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 1 1 0 0 0 0 0 0 1]
real: 

 42%|████▏     | 22/52 [01:14<01:42,  3.41s/it]

logits:
tensor([[ 0.3968, -0.3326],
        [ 0.0284,  0.0184],
        [-0.0622,  0.0802],
        [-0.1725,  0.1986],
        [-0.1373,  0.1554],
        [-0.0157,  0.0700],
        [ 0.2635, -0.1371],
        [-0.1637,  0.1988],
        [ 0.1198, -0.0529],
        [ 0.4312, -0.3905],
        [ 0.1394, -0.0447],
        [ 0.0402, -0.0258],
        [-0.1773,  0.1991],
        [ 0.1420, -0.0744],
        [ 0.4175, -0.3132],
        [ 0.1386, -0.0707],
        [ 0.3240, -0.2180],
        [-0.1651,  0.2062],
        [-0.0291,  0.0989],
        [-0.2506,  0.2596],
        [ 0.4146, -0.2742],
        [ 0.4354, -0.3824],
        [ 0.4261, -0.3512],
        [ 0.1614, -0.1118],
        [ 0.1705, -0.0725],
        [ 0.3607, -0.3375],
        [ 0.3189, -0.2617],
        [-0.0861,  0.0801],
        [-0.0699,  0.0832],
        [ 0.3997, -0.3067],
        [ 0.0045,  0.0437],
        [ 0.0681,  0.0056]], device='cuda:0')
pred: [0 0 1 1 1 1 0 1 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 1 0]
real: 

 44%|████▍     | 23/52 [01:17<01:39,  3.42s/it]

logits:
tensor([[ 0.3192, -0.2373],
        [ 0.3273, -0.2593],
        [ 0.0288,  0.0174],
        [ 0.3692, -0.2484],
        [-0.1711,  0.1949],
        [ 0.3368, -0.2634],
        [ 0.4364, -0.3627],
        [ 0.0287,  0.0387],
        [-0.0909,  0.1067],
        [ 0.4147, -0.3402],
        [-0.1472,  0.1241],
        [ 0.1213, -0.0861],
        [ 0.4060, -0.4034],
        [-0.0570,  0.0635],
        [-0.1228,  0.1553],
        [ 0.2591, -0.1398],
        [ 0.0575,  0.0284],
        [ 0.2011, -0.1392],
        [ 0.0165, -0.0164],
        [-0.0020,  0.0552],
        [-0.0033,  0.0506],
        [-0.1629,  0.1946],
        [-0.1076,  0.1571],
        [ 0.3532, -0.2881],
        [-0.1545,  0.1570],
        [ 0.1108, -0.0333],
        [-0.0735,  0.1434],
        [ 0.4432, -0.3119],
        [ 0.0488,  0.0138],
        [-0.0144,  0.0133],
        [ 0.3268, -0.2619],
        [ 0.3863, -0.2867]], device='cuda:0')
pred: [0 0 0 0 1 0 0 1 1 0 1 0 0 1 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0]
real: 

 46%|████▌     | 24/52 [01:21<01:37,  3.47s/it]

logits:
tensor([[ 0.1749, -0.0767],
        [-0.0277,  0.0585],
        [ 0.4320, -0.3387],
        [-0.1175,  0.1452],
        [ 0.3181, -0.2195],
        [-0.0595,  0.0740],
        [-0.0484,  0.0816],
        [ 0.2701, -0.1446],
        [ 0.3196, -0.2280],
        [ 0.0676, -0.0045],
        [ 0.1656, -0.1283],
        [-0.1412,  0.1868],
        [ 0.2956, -0.2478],
        [ 0.2353, -0.1653],
        [ 0.1332, -0.0660],
        [ 0.2765, -0.1956],
        [-0.0590,  0.0913],
        [ 0.2755, -0.2559],
        [ 0.1439, -0.0837],
        [ 0.0042,  0.0564],
        [ 0.0569, -0.0108],
        [-0.1230,  0.1569],
        [ 0.2366, -0.1220],
        [ 0.0390, -0.0176],
        [ 0.2552, -0.1772],
        [-0.1565,  0.1933],
        [ 0.0982, -0.0531],
        [ 0.2272, -0.1669],
        [ 0.0236,  0.0646],
        [ 0.0100,  0.0563],
        [ 0.3307, -0.2575],
        [ 0.1406, -0.0669]], device='cuda:0')
pred: [0 1 0 1 0 1 1 0 0 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 1 1 0 0]
real: 

 48%|████▊     | 25/52 [01:24<01:32,  3.44s/it]

logits:
tensor([[ 0.0230,  0.0432],
        [ 0.2735, -0.1835],
        [ 0.1923, -0.1408],
        [ 0.1313, -0.0849],
        [ 0.1250, -0.0692],
        [ 0.1327, -0.0645],
        [ 0.5391, -0.5126],
        [ 0.1242, -0.0545],
        [ 0.1209, -0.0339],
        [-0.0830,  0.1436],
        [-0.0344,  0.0859],
        [ 0.4254, -0.3679],
        [-0.2018,  0.1799],
        [ 0.0957, -0.0345],
        [ 0.3110, -0.2469],
        [ 0.2554, -0.1828],
        [-0.0175,  0.0567],
        [ 0.5020, -0.4001],
        [ 0.3367, -0.2315],
        [ 0.0122,  0.0453],
        [ 0.0010,  0.0493],
        [ 0.0140, -0.0022],
        [ 0.2121, -0.1460],
        [ 0.1992, -0.1552],
        [ 0.1444, -0.0510],
        [ 0.0344,  0.0183],
        [-0.0243,  0.0590],
        [ 0.2205, -0.1258],
        [ 0.3542, -0.2915],
        [-0.1260,  0.1678],
        [ 0.3470, -0.1823],
        [ 0.2230, -0.1327]], device='cuda:0')
pred: [1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 1 0 0]
real: 

 50%|█████     | 26/52 [01:28<01:28,  3.40s/it]

logits:
tensor([[ 0.1166, -0.0856],
        [-0.0536,  0.1327],
        [ 0.3287, -0.2369],
        [ 0.0639, -0.0527],
        [-0.1967,  0.2113],
        [ 0.1634, -0.1390],
        [ 0.3596, -0.2144],
        [ 0.4284, -0.2999],
        [ 0.1736, -0.1384],
        [-0.0334,  0.0776],
        [ 0.3944, -0.3280],
        [ 0.4275, -0.3406],
        [ 0.1551, -0.0474],
        [-0.1567,  0.1834],
        [-0.0026,  0.0227],
        [ 0.1290, -0.0532],
        [-0.1348,  0.1530],
        [-0.1642,  0.1650],
        [ 0.1042, -0.0215],
        [ 0.0244,  0.0278],
        [ 0.4461, -0.3262],
        [ 0.2258, -0.1411],
        [-0.0917,  0.1348],
        [-0.0449,  0.0652],
        [ 0.3872, -0.3538],
        [ 0.0586, -0.0103],
        [ 0.0403,  0.0331],
        [-0.1335,  0.1952],
        [ 0.1242, -0.0947],
        [ 0.5042, -0.4139],
        [ 0.4921, -0.4359],
        [-0.1715,  0.1958]], device='cuda:0')
pred: [0 1 0 0 1 0 0 0 0 1 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 0 1 0 0 0 1]
real: 

 52%|█████▏    | 27/52 [01:31<01:25,  3.40s/it]

logits:
tensor([[ 0.0937, -0.0531],
        [ 0.3868, -0.3337],
        [ 0.0408,  0.0738],
        [ 0.1475, -0.0840],
        [-0.1882,  0.1945],
        [ 0.2905, -0.2595],
        [ 0.3092, -0.2464],
        [-0.1892,  0.1931],
        [ 0.0991, -0.0111],
        [ 0.2259, -0.2028],
        [ 0.4941, -0.4423],
        [-0.1446,  0.1639],
        [ 0.2020, -0.1264],
        [ 0.1551, -0.0780],
        [ 0.2731, -0.1681],
        [ 0.0359, -0.0180],
        [ 0.1272, -0.1066],
        [ 0.2946, -0.2402],
        [-0.0714,  0.1673],
        [-0.1596,  0.2052],
        [ 0.2056, -0.1441],
        [ 0.1198, -0.0856],
        [ 0.1183, -0.0599],
        [ 0.4046, -0.3151],
        [ 0.4689, -0.3859],
        [ 0.1548, -0.0892],
        [ 0.2088, -0.1746],
        [ 0.0269,  0.0531],
        [ 0.2657, -0.1578],
        [ 0.3701, -0.2620],
        [ 0.2878, -0.2088],
        [ 0.4465, -0.3526]], device='cuda:0')
pred: [0 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0]
real: 

 54%|█████▍    | 28/52 [01:35<01:21,  3.42s/it]

logits:
tensor([[ 5.0311e-02, -1.6739e-02],
        [ 1.9156e-01, -1.5419e-01],
        [ 2.5933e-01, -1.2479e-01],
        [ 2.5010e-01, -1.5373e-01],
        [-1.1640e-01,  1.5802e-01],
        [ 4.9353e-01, -4.1291e-01],
        [ 1.7549e-01, -1.4261e-01],
        [ 2.2290e-01, -1.7386e-01],
        [ 3.3229e-01, -3.1493e-01],
        [ 2.6591e-01, -1.7867e-01],
        [-5.3146e-03,  5.7353e-02],
        [-8.6420e-02,  1.4585e-01],
        [-1.9700e-01,  1.8919e-01],
        [ 4.5117e-01, -3.4559e-01],
        [ 1.6907e-01, -1.5584e-01],
        [-1.3090e-01,  1.6191e-01],
        [ 2.2349e-01, -1.6436e-01],
        [-1.9395e-01,  1.9899e-01],
        [ 3.6020e-01, -3.0164e-01],
        [-9.6485e-02,  1.7919e-01],
        [-1.6661e-01,  1.6853e-01],
        [-1.7722e-01,  1.8055e-01],
        [ 7.3596e-02, -5.1052e-02],
        [ 1.4969e-01, -9.9444e-02],
        [ 2.5632e-01, -2.0982e-01],
        [ 4.7968e-02, -7.5865e-03],
        [ 3.0097e-01, -1.9059e-01],
        [ 1.0531e-01

 56%|█████▌    | 29/52 [01:38<01:18,  3.40s/it]

logits:
tensor([[ 0.4727, -0.4463],
        [ 0.4100, -0.3514],
        [ 0.0539, -0.0087],
        [-0.0138,  0.0329],
        [ 0.3792, -0.2940],
        [-0.1589,  0.1960],
        [-0.1624,  0.2143],
        [ 0.1196, -0.0608],
        [ 0.2207, -0.1637],
        [ 0.1122, -0.0594],
        [-0.1372,  0.1443],
        [-0.0647,  0.1040],
        [-0.0200,  0.0838],
        [ 0.2161, -0.1730],
        [ 0.1719, -0.1014],
        [-0.0780,  0.0968],
        [-0.0203,  0.0384],
        [ 0.1456, -0.0810],
        [ 0.2057, -0.1056],
        [-0.0164,  0.0848],
        [ 0.4715, -0.3617],
        [-0.1454,  0.1875],
        [ 0.0094,  0.0358],
        [ 0.0684, -0.0461],
        [ 0.0143,  0.0290],
        [ 0.0259,  0.0286],
        [-0.0060,  0.0924],
        [-0.0138,  0.0330],
        [ 0.2380, -0.0974],
        [ 0.3882, -0.2488],
        [ 0.2551, -0.1603],
        [-0.1625,  0.1756]], device='cuda:0')
pred: [0 0 0 1 0 1 1 0 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 0 1 1 1 1 0 0 0 1]
real: 

 58%|█████▊    | 30/52 [01:41<01:13,  3.35s/it]

logits:
tensor([[ 0.0187,  0.0475],
        [ 0.3944, -0.2769],
        [ 0.4334, -0.3394],
        [-0.1511,  0.1900],
        [-0.1121,  0.1350],
        [-0.0719,  0.1242],
        [-0.0999,  0.1291],
        [ 0.4420, -0.4221],
        [-0.1853,  0.1986],
        [ 0.4137, -0.4210],
        [-0.1529,  0.2033],
        [ 0.4361, -0.3516],
        [-0.0238,  0.0195],
        [ 0.2310, -0.1139],
        [ 0.2246, -0.1936],
        [ 0.0255,  0.0271],
        [ 0.2620, -0.1553],
        [-0.2139,  0.2269],
        [ 0.2547, -0.1632],
        [-0.0352,  0.0927],
        [ 0.0243, -0.0007],
        [-0.1553,  0.2033],
        [-0.2470,  0.2602],
        [-0.1644,  0.1764],
        [ 0.4263, -0.3668],
        [-0.2155,  0.2391],
        [ 0.4174, -0.2998],
        [-0.0039,  0.0532],
        [ 0.1137, -0.0866],
        [ 0.3510, -0.2770],
        [-0.1667,  0.1744],
        [ 0.4494, -0.3803]], device='cuda:0')
pred: [1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 1 1 0 1 0 1 0 0 1 0]
real: 

 60%|█████▉    | 31/52 [01:44<01:10,  3.36s/it]

logits:
tensor([[-0.1558,  0.1269],
        [-0.0186,  0.0454],
        [ 0.0438,  0.0525],
        [ 0.4227, -0.3446],
        [ 0.3815, -0.2635],
        [ 0.2470, -0.1096],
        [-0.0478,  0.0983],
        [ 0.2066, -0.1458],
        [ 0.4432, -0.4552],
        [ 0.1147, -0.0662],
        [ 0.2855, -0.2017],
        [ 0.0334, -0.0125],
        [ 0.4847, -0.3972],
        [ 0.1487, -0.0805],
        [-0.1294,  0.1762],
        [-0.0062,  0.0707],
        [ 0.0037,  0.0079],
        [ 0.3870, -0.3191],
        [-0.0316,  0.0676],
        [ 0.2763, -0.1626],
        [ 0.1372, -0.0638],
        [ 0.4611, -0.4020],
        [-0.0502,  0.1135],
        [ 0.2004, -0.2003],
        [ 0.4392, -0.3362],
        [ 0.2402, -0.1790],
        [ 0.1882, -0.1232],
        [ 0.0336, -0.0077],
        [-0.1231,  0.2017],
        [-0.0013,  0.0452],
        [ 0.1271, -0.0414],
        [-0.1721,  0.1737]], device='cuda:0')
pred: [1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 0 0 0 0 0 1 1 0 1]
real: 

 62%|██████▏   | 32/52 [01:48<01:07,  3.36s/it]

logits:
tensor([[-0.0144,  0.0743],
        [-0.1673,  0.1445],
        [-0.1386,  0.1666],
        [ 0.5130, -0.4134],
        [ 0.4626, -0.3406],
        [ 0.0011,  0.0581],
        [ 0.2440, -0.1347],
        [-0.1890,  0.1995],
        [-0.1291,  0.1609],
        [ 0.0979, -0.0534],
        [ 0.1099, -0.0597],
        [ 0.2321, -0.1757],
        [ 0.3745, -0.2608],
        [ 0.4453, -0.4296],
        [ 0.0476, -0.0069],
        [-0.0351,  0.0809],
        [ 0.1241, -0.0446],
        [ 0.3473, -0.2412],
        [ 0.2856, -0.1735],
        [-0.1219,  0.1540],
        [ 0.4559, -0.3746],
        [-0.0552,  0.0881],
        [-0.0120,  0.0225],
        [-0.0232,  0.0590],
        [ 0.4794, -0.4318],
        [ 0.1114, -0.1037],
        [ 0.2174, -0.1535],
        [-0.1266,  0.1723],
        [-0.0267,  0.0522],
        [-0.1862,  0.1903],
        [ 0.0571, -0.0360],
        [-0.1453,  0.1425]], device='cuda:0')
pred: [1 1 1 0 0 1 0 1 1 0 0 0 0 0 0 1 0 0 0 1 0 1 1 1 0 0 0 1 1 1 0 1]
real: 

 63%|██████▎   | 33/52 [01:51<01:03,  3.34s/it]

logits:
tensor([[ 0.1283, -0.0815],
        [-0.0016,  0.0462],
        [ 0.1708, -0.1200],
        [ 0.1947, -0.1479],
        [ 0.1138, -0.0519],
        [-0.1355,  0.1428],
        [ 0.5238, -0.4768],
        [ 0.1523, -0.0757],
        [ 0.4030, -0.3633],
        [ 0.0245,  0.0233],
        [ 0.1562, -0.1255],
        [ 0.4503, -0.4414],
        [-0.1769,  0.1986],
        [ 0.3825, -0.3506],
        [ 0.1757, -0.1521],
        [ 0.0549, -0.0242],
        [ 0.1314, -0.0938],
        [-0.0701,  0.0987],
        [ 0.3843, -0.2808],
        [ 0.4044, -0.3347],
        [ 0.4485, -0.3841],
        [-0.0813,  0.0949],
        [ 0.2646, -0.1664],
        [-0.1449,  0.1870],
        [ 0.1960, -0.1097],
        [ 0.0546,  0.0348],
        [ 0.3667, -0.3437],
        [ 0.4247, -0.2519],
        [ 0.4660, -0.4254],
        [-0.1463,  0.1497],
        [ 0.3622, -0.2070],
        [-0.2178,  0.2216]], device='cuda:0')
pred: [0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 1]
real: 

 65%|██████▌   | 34/52 [01:54<00:59,  3.28s/it]

logits:
tensor([[ 0.5150, -0.4320],
        [ 0.2267, -0.1326],
        [ 0.3792, -0.2818],
        [ 0.3711, -0.2999],
        [ 0.4545, -0.3721],
        [ 0.0264,  0.0286],
        [-0.1220,  0.1665],
        [ 0.2913, -0.2583],
        [ 0.0867, -0.0204],
        [ 0.2564, -0.1931],
        [ 0.2142, -0.1300],
        [ 0.2724, -0.1921],
        [ 0.4554, -0.3770],
        [ 0.0766, -0.0420],
        [-0.0270,  0.0598],
        [-0.1465,  0.1966],
        [ 0.1876, -0.1815],
        [ 0.3896, -0.3022],
        [-0.2106,  0.2085],
        [-0.1683,  0.1907],
        [-0.1672,  0.2041],
        [ 0.2000, -0.1563],
        [-0.1350,  0.1568],
        [-0.0081, -0.0043],
        [-0.0620,  0.1098],
        [ 0.4516, -0.3810],
        [ 0.1534, -0.0977],
        [ 0.5459, -0.4521],
        [ 0.4700, -0.4068],
        [ 0.3196, -0.2451],
        [ 0.4894, -0.4621],
        [ 0.1846, -0.1083]], device='cuda:0')
pred: [0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 1 1 1 0 0 0 0 0 0 0]
real: 

 67%|██████▋   | 35/52 [01:57<00:55,  3.25s/it]

logits:
tensor([[-0.0807,  0.1029],
        [-0.2120,  0.2174],
        [-0.1541,  0.1918],
        [-0.0322,  0.1004],
        [-0.1542,  0.1698],
        [ 0.0914, -0.0155],
        [ 0.0494, -0.0276],
        [ 0.1965, -0.1310],
        [-0.0797,  0.0733],
        [-0.1146,  0.1561],
        [ 0.1228, -0.1325],
        [ 0.1420, -0.0533],
        [ 0.1011, -0.0875],
        [ 0.5359, -0.4560],
        [-0.0021,  0.0776],
        [ 0.1137, -0.0821],
        [-0.1765,  0.1934],
        [ 0.1864, -0.1538],
        [-0.0738,  0.0798],
        [ 0.4361, -0.3320],
        [ 0.4121, -0.3203],
        [ 0.2111, -0.1278],
        [ 0.2863, -0.2304],
        [ 0.3937, -0.3459],
        [ 0.1037, -0.0845],
        [ 0.3219, -0.2453],
        [ 0.1844, -0.1853],
        [ 0.0135,  0.0027],
        [-0.0108,  0.0608],
        [ 0.0897, -0.0480],
        [ 0.1777, -0.1098],
        [ 0.3309, -0.2418]], device='cuda:0')
pred: [1 1 1 1 1 0 0 0 1 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0]
real: 

 69%|██████▉   | 36/52 [02:01<00:52,  3.28s/it]

logits:
tensor([[ 0.1208, -0.0376],
        [-0.1150,  0.1434],
        [ 0.0322, -0.0057],
        [ 0.4443, -0.3260],
        [ 0.2402, -0.1180],
        [ 0.4082, -0.3776],
        [ 0.0138,  0.0214],
        [-0.0254,  0.0541],
        [ 0.2850, -0.1865],
        [ 0.3196, -0.2089],
        [-0.0299,  0.0769],
        [ 0.1030, -0.0676],
        [ 0.1687, -0.1486],
        [ 0.3401, -0.2456],
        [ 0.3507, -0.2634],
        [ 0.1487, -0.1154],
        [ 0.0041,  0.0507],
        [ 0.1745, -0.1457],
        [ 0.1662, -0.1056],
        [ 0.2337, -0.1408],
        [ 0.2998, -0.2003],
        [ 0.1760, -0.1501],
        [ 0.3487, -0.2821],
        [ 0.0164,  0.0253],
        [ 0.0494, -0.0071],
        [-0.1003,  0.1325],
        [ 0.1204, -0.1552],
        [-0.0232,  0.0839],
        [ 0.4249, -0.3876],
        [ 0.5020, -0.4308],
        [ 0.0493,  0.0143],
        [ 0.0934, -0.0400]], device='cuda:0')
pred: [0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0]
real: 

 71%|███████   | 37/52 [02:04<00:48,  3.26s/it]

logits:
tensor([[-0.1827,  0.2063],
        [ 0.4342, -0.3656],
        [ 0.2270, -0.1906],
        [-0.0204,  0.0580],
        [ 0.3352, -0.2326],
        [ 0.5022, -0.4375],
        [ 0.3254, -0.1929],
        [ 0.0608,  0.0006],
        [-0.1149,  0.1709],
        [ 0.4131, -0.4035],
        [-0.0821,  0.1444],
        [-0.0936,  0.1521],
        [-0.1245,  0.1644],
        [-0.0569,  0.0824],
        [-0.2100,  0.2192],
        [ 0.2655, -0.2204],
        [-0.0078,  0.0699],
        [ 0.1584, -0.1282],
        [ 0.2559, -0.1839],
        [ 0.0060,  0.0147],
        [-0.1213,  0.1400],
        [ 0.1191, -0.0768],
        [-0.0820,  0.1142],
        [ 0.4468, -0.2899],
        [ 0.3271, -0.2185],
        [ 0.1023, -0.0202],
        [ 0.3993, -0.3728],
        [ 0.1363, -0.0821],
        [ 0.2278, -0.1025],
        [ 0.2550, -0.1770],
        [ 0.2875, -0.1924],
        [-0.1348,  0.1972]], device='cuda:0')
pred: [1 0 0 1 0 0 0 0 1 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 0 0 0 0 0 0 1]
real: 

 73%|███████▎  | 38/52 [02:07<00:45,  3.25s/it]

logits:
tensor([[ 0.0172,  0.0329],
        [ 0.2932, -0.2023],
        [ 0.0161,  0.0232],
        [ 0.2152, -0.0934],
        [ 0.0924, -0.0954],
        [-0.1498,  0.2032],
        [-0.1047,  0.0887],
        [ 0.3710, -0.3792],
        [ 0.1582, -0.1389],
        [-0.1445,  0.1701],
        [ 0.2294, -0.1706],
        [ 0.2706, -0.2152],
        [ 0.5238, -0.4233],
        [ 0.1585, -0.0384],
        [ 0.0491, -0.0266],
        [ 0.1021, -0.0436],
        [-0.0615,  0.1109],
        [-0.1546,  0.1749],
        [ 0.1128, -0.0823],
        [ 0.4524, -0.3138],
        [-0.1606,  0.1756],
        [ 0.1355, -0.0677],
        [-0.0916,  0.1285],
        [-0.1424,  0.1631],
        [-0.1316,  0.1626],
        [ 0.2602, -0.1475],
        [ 0.3082, -0.2794],
        [ 0.0675, -0.0173],
        [ 0.0797, -0.0108],
        [ 0.2510, -0.2058],
        [-0.1331,  0.1380],
        [ 0.2706, -0.1973]], device='cuda:0')
pred: [1 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 0 0 0 0 0 1 0]
real: 

 75%|███████▌  | 39/52 [02:10<00:41,  3.23s/it]

logits:
tensor([[ 0.0218,  0.0149],
        [ 0.2874, -0.2450],
        [ 0.3951, -0.3751],
        [-0.0848,  0.1110],
        [-0.2011,  0.2240],
        [-0.1759,  0.2026],
        [ 0.1230, -0.0008],
        [ 0.3686, -0.2965],
        [ 0.0166,  0.0365],
        [ 0.4274, -0.3093],
        [ 0.0468,  0.0263],
        [-0.1198,  0.1514],
        [ 0.1713, -0.1498],
        [ 0.1168, -0.0952],
        [ 0.3173, -0.2610],
        [ 0.4254, -0.3910],
        [ 0.1687, -0.0989],
        [ 0.4177, -0.3427],
        [-0.1684,  0.2187],
        [ 0.2061, -0.1421],
        [-0.0481,  0.0732],
        [-0.1927,  0.2102],
        [ 0.1017, -0.0518],
        [ 0.1254, -0.0996],
        [ 0.4790, -0.3803],
        [-0.0031,  0.0360],
        [ 0.4166, -0.3327],
        [-0.2234,  0.2398],
        [-0.1635,  0.1952],
        [ 0.0468,  0.0019],
        [ 0.1121, -0.0829],
        [ 0.0515, -0.0096]], device='cuda:0')
pred: [0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 0 1 0 1 1 0 0 0]
real: 

 77%|███████▋  | 40/52 [02:14<00:38,  3.21s/it]

logits:
tensor([[ 0.1749, -0.1088],
        [-0.1644,  0.2092],
        [-0.1158,  0.1328],
        [-0.1404,  0.1688],
        [-0.0730,  0.0686],
        [ 0.4509, -0.3744],
        [ 0.3507, -0.2152],
        [ 0.2480, -0.1923],
        [-0.1346,  0.1819],
        [ 0.4026, -0.3396],
        [-0.0346,  0.0724],
        [ 0.3212, -0.2157],
        [-0.1237,  0.1746],
        [ 0.1705, -0.1012],
        [-0.1717,  0.1681],
        [ 0.0819, -0.0473],
        [ 0.3516, -0.2403],
        [-0.1373,  0.1520],
        [ 0.1860, -0.1208],
        [ 0.4076, -0.3248],
        [ 0.1550, -0.0982],
        [ 0.3818, -0.2949],
        [-0.1348,  0.1374],
        [-0.2209,  0.2239],
        [-0.0260,  0.0759],
        [ 0.3448, -0.2069],
        [-0.1369,  0.1162],
        [ 0.1881, -0.0955],
        [ 0.0941,  0.0020],
        [ 0.2741, -0.2029],
        [-0.1196,  0.1235],
        [ 0.0685, -0.0510]], device='cuda:0')
pred: [0 1 1 1 1 0 0 0 1 0 1 0 1 0 1 0 0 1 0 0 0 0 1 1 1 0 1 0 0 0 1 0]
real: 

 79%|███████▉  | 41/52 [02:17<00:34,  3.18s/it]

logits:
tensor([[ 0.4975, -0.4143],
        [ 0.4676, -0.4147],
        [-0.1797,  0.2132],
        [ 0.3534, -0.2589],
        [ 0.1559, -0.1094],
        [ 0.2536, -0.1711],
        [ 0.4125, -0.2573],
        [ 0.0940, -0.0610],
        [-0.0115,  0.0748],
        [-0.1218,  0.1139],
        [ 0.1386, -0.0766],
        [ 0.0657, -0.0162],
        [-0.1751,  0.2008],
        [-0.0706,  0.1089],
        [ 0.2558, -0.2043],
        [-0.1834,  0.1957],
        [-0.0372,  0.1271],
        [-0.1550,  0.1712],
        [-0.1170,  0.1197],
        [ 0.4173, -0.2953],
        [-0.1249,  0.1407],
        [-0.1943,  0.2018],
        [ 0.3679, -0.2215],
        [-0.1374,  0.1307],
        [ 0.0764,  0.0182],
        [ 0.2522, -0.2059],
        [-0.1661,  0.1743],
        [ 0.2931, -0.1662],
        [ 0.1837, -0.1515],
        [ 0.1674, -0.0923],
        [ 0.4055, -0.2957],
        [ 0.1534, -0.0771]], device='cuda:0')
pred: [0 0 1 0 0 0 0 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 0 1 0 0 1 0 0 0 0 0]
real: 

 81%|████████  | 42/52 [02:20<00:31,  3.20s/it]

logits:
tensor([[-0.0034,  0.0694],
        [ 0.0897, -0.1006],
        [ 0.0056,  0.0641],
        [-0.0336,  0.0736],
        [ 0.3669, -0.2431],
        [ 0.1478, -0.0729],
        [ 0.4219, -0.3703],
        [ 0.2788, -0.2144],
        [-0.1243,  0.1638],
        [ 0.2329, -0.1453],
        [-0.1936,  0.2150],
        [ 0.1534, -0.0741],
        [-0.1031,  0.1640],
        [-0.1186,  0.1570],
        [-0.0932,  0.1782],
        [ 0.4299, -0.3550],
        [ 0.0822, -0.0569],
        [-0.0532,  0.0872],
        [-0.1178,  0.1599],
        [ 0.0428,  0.0151],
        [ 0.1771, -0.1424],
        [ 0.2156, -0.1567],
        [-0.1732,  0.1741],
        [ 0.1253, -0.0611],
        [ 0.1046,  0.0026],
        [ 0.2774, -0.2404],
        [-0.1553,  0.1411],
        [ 0.1823, -0.1228],
        [ 0.0938, -0.0653],
        [ 0.4762, -0.3545],
        [ 0.0899,  0.0024],
        [ 0.0262,  0.0124]], device='cuda:0')
pred: [1 0 1 1 0 0 0 0 1 0 1 0 1 1 1 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0]
real: 

 83%|████████▎ | 43/52 [02:23<00:28,  3.15s/it]

logits:
tensor([[-0.0895,  0.1495],
        [ 0.2915, -0.1846],
        [-0.1829,  0.1817],
        [ 0.3490, -0.3120],
        [ 0.2301, -0.1534],
        [-0.1555,  0.1703],
        [-0.0110,  0.0669],
        [-0.1103,  0.1261],
        [-0.1329,  0.1636],
        [-0.1417,  0.2020],
        [-0.0850,  0.1452],
        [-0.1549,  0.1981],
        [-0.2003,  0.2029],
        [-0.0254,  0.0489],
        [-0.0905,  0.1393],
        [ 0.0582, -0.0133],
        [-0.1447,  0.1878],
        [-0.1446,  0.1708],
        [-0.0784,  0.0921],
        [-0.0216,  0.0643],
        [ 0.1456, -0.0875],
        [-0.1285,  0.1498],
        [ 0.0897, -0.0287],
        [-0.1395,  0.1492],
        [-0.0314,  0.0429],
        [-0.1092,  0.1612],
        [-0.1527,  0.1780],
        [ 0.2928, -0.2087],
        [-0.1792,  0.1800],
        [ 0.4007, -0.2820],
        [ 0.0774,  0.0067],
        [-0.1353,  0.1646]], device='cuda:0')
pred: [1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 0 1 0 0 1]
real: 

 85%|████████▍ | 44/52 [02:26<00:25,  3.19s/it]

logits:
tensor([[-5.3259e-02,  8.2472e-02],
        [ 2.9594e-01, -1.9980e-01],
        [-1.7059e-01,  2.0507e-01],
        [ 3.0049e-02,  3.2761e-02],
        [-1.4022e-01,  1.3543e-01],
        [ 5.0567e-01, -4.5750e-01],
        [ 8.8724e-02, -3.7186e-02],
        [-6.3318e-02,  9.1125e-02],
        [ 4.0946e-02,  5.4402e-02],
        [ 5.4889e-03,  9.8467e-02],
        [ 4.6466e-02,  2.6053e-02],
        [ 1.8756e-01, -1.3629e-01],
        [ 1.1795e-01, -8.6677e-02],
        [ 1.4175e-01, -4.3930e-02],
        [-7.9571e-02,  9.7927e-02],
        [-2.3661e-02,  6.5112e-02],
        [-1.7628e-01,  2.0100e-01],
        [-1.2308e-01,  1.7954e-01],
        [ 1.8775e-01, -1.6674e-01],
        [ 2.3867e-01, -1.4039e-01],
        [ 3.3271e-01, -2.1183e-01],
        [ 4.1535e-01, -3.7776e-01],
        [-1.2758e-01,  1.6852e-01],
        [ 3.8371e-02, -1.6433e-04],
        [-1.9733e-01,  2.2468e-01],
        [ 4.0837e-01, -3.2198e-01],
        [ 8.6840e-02, -3.5998e-02],
        [ 1.1551e-01

 87%|████████▋ | 45/52 [02:29<00:22,  3.20s/it]

logits:
tensor([[ 0.1332, -0.0771],
        [ 0.1738, -0.0995],
        [ 0.0924, -0.0258],
        [-0.1741,  0.1955],
        [-0.1604,  0.1990],
        [-0.0845,  0.1244],
        [-0.1529,  0.1705],
        [ 0.1764, -0.1127],
        [ 0.3618, -0.2636],
        [ 0.3063, -0.1966],
        [ 0.2877, -0.2274],
        [-0.1594,  0.1627],
        [-0.1697,  0.1714],
        [ 0.1645, -0.1285],
        [ 0.0464, -0.0179],
        [ 0.2227, -0.1133],
        [ 0.0478, -0.0121],
        [-0.1927,  0.2067],
        [ 0.1562, -0.0857],
        [ 0.0134,  0.0143],
        [-0.1998,  0.1981],
        [-0.0775,  0.0864],
        [ 0.2074, -0.0843],
        [ 0.3495, -0.2800],
        [ 0.1039, -0.0367],
        [-0.1203,  0.1360],
        [ 0.2860, -0.1951],
        [ 0.2871, -0.1863],
        [ 0.1202, -0.0731],
        [-0.1912,  0.1921],
        [ 0.4742, -0.4169],
        [ 0.2037, -0.0982]], device='cuda:0')
pred: [0 0 0 1 1 1 1 0 0 0 0 1 1 0 0 0 0 1 0 1 1 1 0 0 0 1 0 0 0 1 0 0]
real: 

 88%|████████▊ | 46/52 [02:33<00:19,  3.21s/it]

logits:
tensor([[ 0.4560, -0.4317],
        [ 0.1999, -0.1159],
        [ 0.0901, -0.0518],
        [-0.1035,  0.1027],
        [ 0.2436, -0.2345],
        [ 0.1358, -0.0480],
        [ 0.4495, -0.4102],
        [ 0.0560, -0.0147],
        [ 0.3456, -0.2679],
        [-0.0904,  0.1322],
        [-0.1304,  0.1574],
        [ 0.0953, -0.0857],
        [ 0.1403, -0.0769],
        [ 0.3903, -0.2612],
        [ 0.4234, -0.2840],
        [ 0.3499, -0.3419],
        [ 0.0120,  0.0044],
        [ 0.4404, -0.3215],
        [ 0.1837, -0.1695],
        [ 0.4513, -0.3787],
        [-0.1389,  0.1819],
        [ 0.0225,  0.0537],
        [ 0.4529, -0.4217],
        [ 0.3572, -0.2761],
        [ 0.4886, -0.4094],
        [ 0.0055,  0.0113],
        [-0.0100,  0.0358],
        [ 0.0410,  0.0037],
        [-0.0492,  0.1048],
        [ 0.0719, -0.0454],
        [ 0.3101, -0.2274],
        [-0.1081,  0.1306]], device='cuda:0')
pred: [0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 1 0 0 1]
real: 

 90%|█████████ | 47/52 [02:36<00:15,  3.14s/it]

logits:
tensor([[ 0.4544, -0.4116],
        [ 0.2070, -0.1443],
        [-0.1815,  0.2043],
        [-0.0345,  0.0711],
        [-0.1297,  0.1290],
        [-0.0109,  0.0982],
        [ 0.2996, -0.2635],
        [ 0.4129, -0.2969],
        [-0.1813,  0.1898],
        [-0.1390,  0.1630],
        [ 0.1188, -0.0435],
        [-0.1128,  0.1658],
        [ 0.0125,  0.0059],
        [ 0.3071, -0.1942],
        [-0.1131,  0.1624],
        [ 0.0418, -0.0065],
        [ 0.2342, -0.1571],
        [-0.1537,  0.1900],
        [ 0.2366, -0.1592],
        [ 0.4930, -0.3803],
        [ 0.3603, -0.3140],
        [ 0.3895, -0.3646],
        [-0.0194,  0.0550],
        [ 0.3493, -0.2055],
        [ 0.4392, -0.4287],
        [ 0.1982, -0.1334],
        [-0.1209,  0.1242],
        [ 0.1726, -0.0902],
        [ 0.1791, -0.1603],
        [-0.1292,  0.1819],
        [ 0.1001, -0.0827],
        [ 0.0094,  0.0125]], device='cuda:0')
pred: [0 0 1 1 1 1 0 0 1 1 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 1 0 1]
real: 

 92%|█████████▏| 48/52 [02:39<00:12,  3.13s/it]

logits:
tensor([[ 0.1669, -0.0703],
        [ 0.3686, -0.3528],
        [-0.0114,  0.0555],
        [ 0.2312, -0.1122],
        [-0.1758,  0.2030],
        [-0.0059,  0.0340],
        [ 0.3056, -0.2755],
        [ 0.1751, -0.1132],
        [ 0.4146, -0.3650],
        [ 0.1953, -0.1422],
        [-0.1417,  0.1560],
        [ 0.4242, -0.3548],
        [ 0.0109,  0.0460],
        [ 0.2849, -0.2868],
        [-0.0897,  0.0915],
        [-0.0302,  0.0466],
        [ 0.4039, -0.3426],
        [-0.0747,  0.1162],
        [-0.1941,  0.1945],
        [ 0.4246, -0.3620],
        [-0.0342,  0.0435],
        [ 0.1305, -0.0790],
        [ 0.4263, -0.3644],
        [-0.1164,  0.1622],
        [ 0.1117, -0.0378],
        [-0.1708,  0.1777],
        [ 0.4412, -0.3646],
        [ 0.3945, -0.3324],
        [-0.1558,  0.1484],
        [ 0.1614, -0.0923],
        [ 0.4547, -0.3277],
        [-0.0541,  0.0925]], device='cuda:0')
pred: [0 0 1 0 1 1 0 0 0 0 1 0 1 0 1 1 0 1 1 0 1 0 0 1 0 1 0 0 1 0 0 1]
real: 

 94%|█████████▍| 49/52 [02:42<00:09,  3.16s/it]

logits:
tensor([[-0.0761,  0.1292],
        [-0.0932,  0.1047],
        [-0.0342,  0.0795],
        [ 0.3569, -0.2704],
        [ 0.2622, -0.2202],
        [ 0.3190, -0.2240],
        [-0.1129,  0.1370],
        [ 0.0709, -0.0198],
        [ 0.2391, -0.1728],
        [ 0.2461, -0.2111],
        [-0.1490,  0.1583],
        [ 0.0365, -0.0470],
        [ 0.0027,  0.0110],
        [ 0.0104,  0.0328],
        [ 0.0247, -0.0134],
        [-0.1836,  0.1990],
        [-0.1067,  0.1590],
        [ 0.1766, -0.1457],
        [-0.1354,  0.1699],
        [-0.1286,  0.1424],
        [ 0.1088, -0.0802],
        [-0.1200,  0.1496],
        [-0.0493,  0.1000],
        [ 0.0891, -0.0196],
        [ 0.3445, -0.2836],
        [ 0.1227, -0.0586],
        [ 0.1117, -0.0442],
        [-0.1566,  0.1543],
        [-0.1280,  0.1442],
        [ 0.0468,  0.0013],
        [ 0.1670, -0.0920],
        [ 0.1589, -0.0962]], device='cuda:0')
pred: [1 1 1 0 0 0 1 0 0 0 1 0 1 1 0 1 1 0 1 1 0 1 1 0 0 0 0 1 1 0 0 0]
real: 

 96%|█████████▌| 50/52 [02:45<00:06,  3.19s/it]

logits:
tensor([[-0.0250,  0.0614],
        [ 0.0209, -0.0118],
        [ 0.4534, -0.3429],
        [-0.0808,  0.0980],
        [ 0.1537, -0.1035],
        [-0.0976,  0.1170],
        [-0.2093,  0.2586],
        [ 0.2339, -0.1391],
        [ 0.2413, -0.2009],
        [-0.0178,  0.0385],
        [-0.0966,  0.1681],
        [ 0.3156, -0.2392],
        [ 0.2456, -0.1324],
        [-0.0685,  0.1218],
        [ 0.1479, -0.0397],
        [ 0.2366, -0.1439],
        [-0.1479,  0.1671],
        [ 0.2800, -0.1655],
        [ 0.1632, -0.0879],
        [-0.1313,  0.1594],
        [ 0.0877, -0.0463],
        [-0.0010,  0.0238],
        [-0.1676,  0.1838],
        [ 0.1776, -0.1047],
        [ 0.0511, -0.0101],
        [ 0.0248,  0.0008],
        [ 0.0230, -0.0142],
        [ 0.0352,  0.0071],
        [ 0.0864, -0.0172],
        [ 0.2826, -0.2026],
        [-0.1210,  0.1089],
        [ 0.0515, -0.0515]], device='cuda:0')
pred: [1 0 0 1 0 1 1 0 0 1 1 0 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 1 0]
real: 

 98%|█████████▊| 51/52 [02:48<00:03,  3.19s/it]

logits:
tensor([[ 0.4093, -0.3212],
        [ 0.1768, -0.0792],
        [ 0.3792, -0.3851],
        [-0.1127,  0.1410],
        [ 0.2301, -0.1777],
        [ 0.1870, -0.1308],
        [-0.1601,  0.1771],
        [ 0.2872, -0.1707],
        [ 0.2998, -0.2625],
        [-0.1355,  0.1384],
        [ 0.3690, -0.3363],
        [-0.1900,  0.2115],
        [ 0.3917, -0.3331],
        [-0.1276,  0.1587],
        [ 0.0599, -0.0139],
        [ 0.4453, -0.4289],
        [ 0.2063, -0.2003],
        [ 0.0933, -0.0609],
        [ 0.0417,  0.0189],
        [-0.1662,  0.1488],
        [ 0.2551, -0.1291],
        [-0.1454,  0.1812],
        [-0.0321,  0.0892],
        [ 0.0550, -0.0092],
        [-0.1948,  0.2041],
        [ 0.1281, -0.0914],
        [-0.1677,  0.1636],
        [ 0.4241, -0.3984],
        [-0.0416,  0.0728],
        [-0.1006,  0.0943],
        [ 0.3720, -0.3432],
        [ 0.1092, -0.0567]], device='cuda:0')
pred: [0 0 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 1 0 1 1 0 1 0 1 0 1 1 0 0]
real: 

100%|██████████| 52/52 [02:50<00:00,  3.28s/it]

logits:
tensor([[-0.1545,  0.1706],
        [-0.0513,  0.1023],
        [ 0.1295, -0.0586],
        [ 0.1343, -0.1166],
        [ 0.1440, -0.1116],
        [-0.0125,  0.0538],
        [-0.1056,  0.1462],
        [ 0.3765, -0.2729],
        [ 0.0297, -0.0084],
        [-0.0469,  0.0669],
        [ 0.3702, -0.3176],
        [ 0.1084, -0.0487],
        [-0.2232,  0.2193],
        [ 0.2228, -0.1386],
        [ 0.2532, -0.1463]], device='cuda:0')
pred: [1 1 0 0 0 1 1 0 0 1 0 0 1 0 0]
real: [0 0 0 0 0 0 0 0 0 1 0 0 1 0 0]

	test loss: nan

	test acc: 0.669150641025641

	test prec: 0.8620109441808648

	test rec: 0.669150641025641

	test f1: 0.726120170499398



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [22]:
print("pred_flat:", "\n", pred_flat)
print("\n")
print("logits", "\n", logits)

pred_flat: 
 [1 1 0 0 0 1 1 0 0 1 0 0 1 0 0]


logits 
 [[-0.15452594  0.17062987]
 [-0.05125122  0.1022709 ]
 [ 0.12950607 -0.05856593]
 [ 0.13427016 -0.11655997]
 [ 0.14400426 -0.11163004]
 [-0.01252272  0.05384506]
 [-0.10559119  0.14616893]
 [ 0.37654907 -0.27287242]
 [ 0.02965242 -0.00835072]
 [-0.04686457  0.06688274]
 [ 0.37016228 -0.31758758]
 [ 0.10836572 -0.04874229]
 [-0.22318672  0.21926735]
 [ 0.22281946 -0.13857664]
 [ 0.25320628 -0.14630987]]


### printig some variabels to look into 

In [None]:
train_loss


In [None]:
train_acc 

In [None]:
train_prec

In [None]:

train_rec

In [None]:

train_f1

In [None]:
val_accuracy

In [None]:
val_loss

In [None]:
val_acc

In [None]:
val_prec

In [None]:
val_rec

In [None]:
val_f1

In [None]:
test_accuracy

In [None]:
test_loss

In [None]:
test_acc

In [None]:
test_prec

In [None]:
test_rec

In [None]:
test_f1