## A model build using TweetBERT to classify tweet as causal or non-causal 

In [3]:
import pandas as pd
import numpy as np
import spacy 
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification, AutoTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import random
import os
import torch.nn.functional as F
import torch
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
import transformers
from tqdm import tqdm, trange
#from google.colab import drive, files
import io

In [4]:
#uploaded = files.upload()
#data = pd.read_excel(io.BytesIO(uploaded['Causality + hypoglycemia.xlsx']), sheet_name=">5000_samples_")


# data = pd.read_excel("/home/adrian/Downloads/Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")
data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")

  data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")


In [5]:
print("Total count:", data.shape[0])
data = data[data["Causal association"].notnull()]
print("Labeled count:", data.shape[0])

data.head()

Total count: 5456
Labeled count: 5000


Unnamed: 0,id,text,full_text,Intent,Cause,Effect,Causal association,Charline association0=no;1=yes,Remarks
0,908171203029868545,"tonight , I learned my older girl will back he...","tonight , I learned my older girl will back he...",,,,0.0,,
1,1203645589214367745,USER USER I knew diabetes and fibromyalgia wer...,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0.0,,
2,1310596731063525376,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,mS,,,0.0,,
3,1125198453167022085,USER Cheers ! Have one for this diabetic too !,USER Cheers ! Have one for this diabetic too !,mS,,,0.0,,
4,1248600944138268673,USER Additionally the medicines are being char...,USER Additionally the medicines are being char...,,medicines are being charged at MRP,costing much higher,1.0,,


### Interrater-reliabilty measure

In [6]:
from sklearn.metrics import cohen_kappa_score

charline = data[data["Charline association0=no;1=yes"].notnull()]
coder1 = charline["Causal association"].values
coder2 = charline["Charline association0=no;1=yes"]
score = cohen_kappa_score(coder1,coder2)
print('Cohen\'s Kappa:',score)

Cohen's Kappa: 0.823719518166683


### Preprocessing

In [7]:
data["Causal association"].value_counts()

0.0    3720
1.0    1280
Name: Causal association, dtype: int64

In [8]:
# Normalisation for BertTweet
from nltk.tokenize import TweetTokenizer
from emoji import demojize
import re

tokenizer = TweetTokenizer()

# https://huggingface.co/vinai/bertweet-base
def normalizeToken(token):
    lowercased_token = token.lower()
    if token.startswith("@"):
        return "@USER"
    elif lowercased_token.startswith("http") or lowercased_token.startswith("www"):
        return "HTTPURL"
    elif len(token) == 1:
        return demojize(token)
    else:
        if token == "’":
            return "'"
        elif token == "…":
            return "..."
        else:
            return token

def normalizeTweet(tweet):

    tokens = tokenizer.tokenize(tweet.replace("’", "'").replace("…", "..."))
    normTweet = " ".join([normalizeToken(token) for token in tokens])

    normTweet = normTweet.replace("cannot ", "can not ").replace("n't ", " n't ").replace("n 't ", " n't ").replace("ca n't", "can't").replace("ai n't", "ain't")
    normTweet = normTweet.replace("'m ", " 'm ").replace("'re ", " 're ").replace("'s ", " 's ").replace("'ll ", " 'll ").replace("'d ", " 'd ").replace("'ve ", " 've ")
    normTweet = normTweet.replace(" p . m .", "  p.m.") .replace(" p . m ", " p.m ").replace(" a . m .", " a.m.").replace(" a . m ", " a.m ")

    normTweet = re.sub(r",([0-9]{2,4}) , ([0-9]{2,4})", r",\1,\2", normTweet)
    normTweet = re.sub(r"([0-9]{1,3}) / ([0-9]{2,4})", r"\1/\2", normTweet)
    normTweet = re.sub(r"([0-9]{1,3})- ([0-9]{2,4})", r"\1-\2", normTweet)
    
    return " ".join(normTweet.split())

In [9]:
def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    sentences = text.split("<stop>")
    #sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    sentences = [s  for s in sentences if s != ""]
    return sentences


def create_training_data(data):
    tweets = []
    causal_labels = []
    
    for i, row in data.iterrows():
#        print("\n")
#        print(row["full_text"])
#        print(row["Intent"], "->", row["Causal association"])
       
        sentences = split_into_sentences(row["full_text"])
        intents = set(str(row["Intent"]).strip().split(";"))
        causes = str(row["Cause"]).strip().split(";")
        effects = str(row["Effect"]).strip().split(";")
#        print("\tintents:", intents)
#        print("\tcauses: '{}'".format(causes))
#        print("\teffects: '{}'".format(effects))
        
        if set({"nan"}) == intents or set({" "}) == intents: # single sentence
#            print("\tA: add => causality: {}".format(row["Causal association"]))
            tweets.append(row["full_text"])
            causal_labels.append(row["Causal association"])

        elif (
             set({"q"}) == intents 
          or set({"joke"}) == intents 
          or set({"q", "joke"}) == intents
          or set({"joke", "mS"}) == intents 
          or set({"neg"}) == intents 
          or set({"neg", "msS"}) == intents
          or set({"neg", "mS"}) == intents
          or set({"neg", "msS", "mE"}) == intents
          or set({"q", "joke", "mS"}) == intents
          or set({"q", "msS", "neg"}) == intents
          or set({"neg", "mC"}) == intents
          or set({"mC", "joke", "msS"}) == intents
          or set({"joke", "mE"}) == intents
        ):
#            print("\tB ignore")
            continue
        elif (  
             set({"mS"}) == intents # multiple sentences (possible that cause and effect in different sentences -> ignore)
          or set({"q", "mS"}) == intents # multiple sentences or question
          or set({"mS", "mE"}) == intents
          or set({"mC", "mS"}) == intents
          or set({"mC", "mS", "mE"}) == intents
          or set({"q", "mC", "mS"}) == intents
          or set({"q", "mC", "mS", "mE"}) == intents
            
        ):
            for sent in sentences:
#                print(sent)
                if sent[-1] != "?": # ignore questions
#                    print("\tC: add, causality => 0")
                    tweets.append(sent)
                    causal_labels.append(0)  
#                else:
#                    print("\tD: ignore")
        elif (
            set({"msS"}) == intents # multiple sentences with cause and effect in single sentence
         or set({"q", "msS"}) == intents # msS and a question
         or set({"msS", "mE"}) == intents # msS with several effects
         or set({"mC", "msS"}) == intents
         or set({"mE"}) == intents # multiple effects
         or set({"mC"}) == intents # multiple causes
         or set({"mC", "msS", "mE"}) == intents
         or set({"mC", "mE"}) == intents
         or set({"q", "mC", "mE"}) == intents   
         or set({"q", "mC", "msS"}) == intents
        ):

            if row["Causal association"] != 1: #TEST
                print(sentences)
                print("1) ERROR: Causal association should be 1 !!!!")      
                print()
        
            for sent in sentences:
#                print("sub sent:", sent)
                if sent[-1] != "?": # ignore question
                    
                    existCause = False
                    for cause in causes:
                        if cause in sent:
#                            print("Cause: <{}> in sent".format(cause))
                            existCause = True
                    
                    existEffect = False
                    for effect in effects:
                        if effect in sent:
#                            print("Effect: <{}> in sent".format(effect))
                            existEffect = True
                            
                    if existCause and existEffect:
                        tweets.append(sent)
                        causal_labels.append(row["Causal association"])
#                        print("E: add with Cause + effect => association: {}".format(row["Causal association"]))
                    else:
                        tweets.append(sent)
                        causal_labels.append(0)
#                        print("F: not both C + E in sentence, association: 0")
#                else:
#                    print("H: question in sentence")
            if row["Causal association"] == 0:
                print(sentences)
                print("H: should not have causality == 0")
#        else:
#            print("END: should not happen")

    
    return pd.DataFrame({"tweet" : tweets, "Causal association" : causal_labels})

trainingData = create_training_data(data)
print("N sentences:", trainingData.shape)
trainingData = trainingData[trainingData["tweet"].str.split(" ").str.len() > 3] # keep only sentence with more than 3 tokens
print("N sentences with > 3 words:", trainingData.shape)

N sentences: (9779, 2)
N sentences with > 3 words: (8235, 2)


In [10]:
# let's print a few example of our cleaned and normalized traingin dataset
trainingData.head()

Unnamed: 0,tweet,Causal association
0,"tonight , I learned my older girl will back he...",0.0
1,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,0.0
2,I'm a trans woman .,0.0
3,"Both of us could use a world where "" brave and...",0.0
4,"Make a world where people can just be , withou...",0.0


In [11]:
trainingData["Causal association"].value_counts()

0.0    7218
1.0    1017
Name: Causal association, dtype: int64

### Training

In [12]:
# text = trainingData["tweet"].map(normalizeTweet).values.tolist()
# labels = trainingData["Causal association"].values.tolist()
# # first split the data into traingin and testing label in the ratio of 80:20
# train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2)
# # split the new training data (80% of actual data) to get train and validation set
# train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2)



# print("Train: {}".format(len(train_texts)))
# print("Val: {}".format(len(val_texts)))
# print("Test: {}".format(len(test_texts)))


In [13]:
## when not straitifyign the data: 

text = trainingData["tweet"].map(normalizeTweet).values.tolist()
labels = trainingData["Causal association"].values.tolist()
# first split the data into training and testing label in the ratio of 80:20
train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2)
# split the new training data (80% of actual data) to get train and validation set
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2)

data_count_info = pd.Series(labels).value_counts(normalize=True)
train_count_info = pd.Series(train_labels).value_counts(normalize=True)
val_count_info = pd.Series(val_labels).value_counts(normalize=True)
test_count_info = pd.Series(test_labels).value_counts(normalize=True)

# for class-imbalanced dataset, the class weight for a ith class
# to be specified for balancing in the loss function is given by:
# weight[i] = num_samples / (num_classes * num_samples[i])
# since train_count_info obtained above has fraction of samples
# for ith class, hence the corresponding weight calculation is:
class_weight = (1/train_count_info)/len(train_count_info)

print("All: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(labels), *data_count_info.round(4).to_list()))
print("Train: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(train_labels), *train_count_info.round(4).to_list()))
print("Val: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(val_labels), *val_count_info.round(4).to_list()))
print("Test: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(test_labels), *test_count_info.round(4).to_list()))
print("Balancing class wts: for 0 = {}, for 1 = {}".format(
    *class_weight.round(4).to_list()))

All: Count = 8235, % of 0 = 0.8765, % of 1 = 0.1235
Train: Count = 5270, % of 0 = 0.8787, % of 1 = 0.1213
Val: Count = 1318, % of 0 = 0.8725, % of 1 = 0.1275
Test: Count = 1647, % of 0 = 0.8725, % of 1 = 0.1275
Balancing class wts: for 0 = 0.569, for 1 = 4.1236


In [14]:
####################### Stratified splits ####################

text = trainingData["tweet"].map(normalizeTweet).values.tolist()
labels = trainingData["Causal association"].values.tolist()
# first split the data into training and testing label in the ratio of 80:20
train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=0.2, stratify=labels)
# split the new training data (80% of actual data) to get train and validation set
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2, stratify=train_labels)



data_count_info = pd.Series(labels).value_counts(normalize=True)
train_count_info = pd.Series(train_labels).value_counts(normalize=True)
val_count_info = pd.Series(val_labels).value_counts(normalize=True)
test_count_info = pd.Series(test_labels).value_counts(normalize=True)

# for class-imbalanced dataset, the class weight for a ith class
# to be specified for balancing in the loss function is given by:
# weight[i] = num_samples / (num_classes * num_samples[i])
# since train_count_info obtained above has fraction of samples
# for ith class, hence the corresponding weight calculation is:
class_weight = (1/train_count_info)/len(train_count_info)

print("All: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(labels), *data_count_info.round(4).to_list()))
print("Train: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(train_labels), *train_count_info.round(4).to_list()))
print("Val: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(val_labels), *val_count_info.round(4).to_list()))
print("Test: Count = {}, % of 0 = {}, % of 1 = {}".format(
    len(test_labels), *test_count_info.round(4).to_list()))
print("Balancing class wts: for 0 = {}, for 1 = {}".format(
    *class_weight.round(4).to_list()))

All: Count = 8235, % of 0 = 0.8765, % of 1 = 0.1235
Train: Count = 5270, % of 0 = 0.8765, % of 1 = 0.1235
Val: Count = 1318, % of 0 = 0.8763, % of 1 = 0.1237
Test: Count = 1647, % of 0 = 0.8767, % of 1 = 0.1233
Balancing class wts: for 0 = 0.5705, for 1 = 4.0476


In [15]:
# train_texts[1]

In [16]:
# Transform labels + encodings into Pytorch DataSet object (including __len__, __getitem__)
class TweetDataSet(torch.utils.data.Dataset):
    def __init__(self, text, labels, tokenizer):
        self.text = text
        self.labels = labels
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.text, padding=True, truncation=True, return_token_type_ids=True)
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]
        return {
                "input_ids" : torch.tensor(ids[idx], dtype=torch.long)
              , "attention_mask" : torch.tensor(mask[idx], dtype=torch.long)
              , "token_type_ids" : torch.tensor(token_type_ids[idx], dtype=torch.long)
              , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
        }      

    def __len__(self):
        return len(self.labels)

    
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")

train_dataset = TweetDataSet(train_texts, train_labels, tokenizer)
val_dataset = TweetDataSet(val_texts, val_labels, tokenizer)
test_dataset = TweetDataSet(test_texts, test_labels, tokenizer)
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

# put data to batches (iterables)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


5270
1318
1647


In [17]:
# !jupyter nbextension enable --py widgetsnbextension

In [16]:
# # from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
# # from transformers import AutoModelForSequenceClassification
# from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef

# def compute_metrics(pred, labels):
#     #labels = pred.label_ids
#     #preds = pred.predictions.argmax(-1)
#     precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='binary')
#     acc = accuracy_score(labels, pred)
#     return {
#         'accuracy': acc,
#         'f1': f1,
#         'precision': precision,
#         'recall': recall
#     }



# class CausalityBERT(torch.nn.Module):
#     """ Model Bert"""
#     def __init__(self):
#         super(CausalityBERT, self).__init__()
#         self.num_labels = 2
#         self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
#         self.dropout = torch.nn.Dropout(0.3)
#         self.linear = torch.nn.Linear(768, self.num_labels)
#         # softmax layer missing ? -> Vivek
        
#     def forward(self, input_ids, attention_mask, token_type_ids):
#         _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False)
#         output_2 = self.dropout(output_1)
#         logits = self.linear(output_2)
#         return logits


# ## Model parameters
# batchsize_train = 16
# lr = 5e-5
# adam_eps = 1e-8
# epochs = 3 
# num_warmup_steps = 0
# num_training_steps = len(train_loader)*epochs

# # Store our loss and accuracy for plotting
# train_loss_set = []
# learning_rate = []


# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# #model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base")
# model = CausalityBERT()
# model.to(device)

# # fine-tune only the task-specific parameters -> Vivek? 
# for param in model.bert.parameters():
#     param.requires_grad = False
    
# model.to(device)
# model.train() # set model to training mode


# optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
# scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0; after a warmup period durnig which it increases linearly from 0 to the initial lr set in the optimizer
# loss_fn = CrossEntropyLoss()

# for epoch in trange(1, epochs+1, desc='Epoch'):
#     print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")
    
#     #calculate total loss for this epoch
#     batch_loss = 0
    
#     for batch in tqdm(train_loader):
#         #print("batch:", batch)
#         optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
#         input_ids = batch['input_ids'].to(device)
#         attention_mask = batch['attention_mask'].to(device)
#         token_type_ids = batch["token_type_ids"].to(device)
#         labels = batch['labels'].to(device)
#         #logits = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # forward pass
#         logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        
        
        
        
#         loss = loss_fn(logits, labels)
#         print("loss:", loss)
#         #loss = outputs[0]
#         loss.backward() # backward pass
#         optim.step()    # update parameters and take a step up using the computed gradient
#         scheduler.step()# update learning rate scheduler
#         batch_loss += loss.item() # update tracking variables
        
#     avg_train_loss = batch_loss / len(train_loader) # calculate avg loss over training data

#     # store the current learning rate
#     for param_group in optim.param_groups:
#         print("\n\tCurrent Learning rate: ", param_group['lr'])
#         learning_rate.append(param_group['lr'])
    
#     train_loss_set.append(avg_train_loss)
#     print(F'\n\tAverage Training loss: {avg_train_loss}')

    
#     ## ---- Validation ------
#     model.eval() # put model in evaluation mode for validation set
    
#     eval_accuracy, eval_mcc_accuracy, nb_eval_steps = 0, 0, 0 # Tracking variables
#     val_accuracy = []
#     val_loss = []
#     val_acc = []
#     val_prec = []
#     val_rec = []
#     val_f1 = []
    
    
#     # Evaluate data for one epoch
#     for batch in tqdm(validation_loader):
#         batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
#         b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
        
#         with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
#           #  logits = model(b_inputs_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids) # forward pass, calculates logit predictions
#           logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions
            
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())
        
#         # move logits and labels to CPU
#         logits = logits.detach().to('cpu').numpy()
#         label_ids = b_labels.to('cpu').numpy()
        
#         pred_flat = np.argmax(logits, axis=1).flatten() # convert logits to list of predicted labels
#         labels_flat = label_ids.flatten()
#         eval_accuracy += accuracy_score(labels_flat, pred_flat)
#         eval_mcc_accrucay += matthews_corrcoef(labels_flat, pred_flat)        
#         metrics = compute_metrics(pred_flat, labels_flat)
#         val_acc.append(metrics["accuracy"])
#         val_prec.append(metrics["precision"])
#         val_rec.append(metrics["recall"])
#         val_f1.append(metrics["f1"])
#         nb_eval_steps += 1
        
#     print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation loss: {np.mean(val_loss)}')
#     print(F'\n\tValidation acc: {np.mean(val_acc)}')
#     print(F'\n\tValidation prec: {np.mean(val_prec)}')
#     print(F'\n\tValidation rec: {np.mean(val_rec)}')
#     print(F'\n\tValidation f1: {np.mean(val_f1)}')

    


In [18]:
# from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
# from transformers import AutoModelForSequenceClassification

## we are measuring weighted metrics - as our dataset is unbalanced 
# Calculate metrics for each label, and find their average weighted by support
#(the number of true instances for each label). 
#This alters ‘macro’ to account for label imbalance; 
# it can result in an F-score that is not between precision and recall.


from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef

def compute_metrics(pred, labels):
    #labels = pred.label_ids
    #preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='weighted')
    acc = accuracy_score(labels, pred)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }



class CausalityBERT(torch.nn.Module):
    """ Model Bert"""
    def __init__(self):
        super(CausalityBERT, self).__init__()
        self.num_labels = 2
        self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
        self.dropout = torch.nn.Dropout(0.3)
        self.linear1 = torch.nn.Linear(768, 256)
        self.linear2 = torch.nn.Linear(256, self.num_labels)
        self.softmax = torch.nn.Softmax(-1)
        
    def forward(self, input_ids, attention_mask, token_type_ids):
        _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False) # if output 1 is our cls token        
        output_2 = self.dropout(output_1)
        output_3 = self.linear1(output_2)  
        output_4 = self.dropout(output_3)
        output_5 = self.linear2(output_4)
        # cross entory will take care of the logits - we don't need if we are usign cross entropy for loss function 
        # if doing yourself - use nll loss and logSoftmax 
#         logit = self.softmax(output_5)
        return output_5



    


## Model parameters


In [19]:
batchsize_train = 16
lr = 1e-3
adam_eps = 1e-8
epochs = 35
num_warmup_steps = 0
num_training_steps = len(train_loader)*epochs

In [20]:


# Store our loss and accuracy for plotting : where is accuracy : it is loss an dlearning rate 
train_loss_set = []
learning_rate = []


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = CausalityBERT()
model.to(device)

# fine-tune only the task-specific parameters -> Vivek? 
## can we just update last layer of BERT 
for param in model.bert.parameters():
    param.requires_grad = False


optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0; after a warmup period durnig which it increases linearly from 0 to the initial lr set in the optimizer
# loss_fn = CrossEntropyLoss()

## penalising more for class with less number of exaplmes 
loss_fn = CrossEntropyLoss(torch.tensor(class_weight.to_list()).to(device))


You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.6.attention.self.key.bias', 'roberta.encoder.layer.9.intermediate.dense.weight', 'roberta.encoder.layer.8.attention.self.value.bias', 'roberta.encoder.layer.5.attention.self.query.bias', 'roberta.encoder.layer.9.attention.output.dense.bias', 'roberta.encoder.layer.5.attention.self.value.weight', 'roberta.encoder.layer.7.attention.output.LayerNorm.weight', 'roberta.encoder.layer.6.attention.output.LayerNorm.bias', 'roberta.encoder.layer.10.attention.self.value.weight', 'roberta.encoder.layer.9.attention.self.key.bias', 'roberta.encoder.layer.6.intermediate.dense.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'lm_head.dense.bias', 'roberta.encoder.layer.4.intermediate.dense.weight', 'roberta.e

In [20]:



for epoch in trange(1, epochs+1, desc='Epoch'):
    print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")
    
    #calculate total loss for this epoch
#     epoch_loss = 0
    
    ########### training eval metrics #############################
    tr_accuracy, tr_mcc_accuracy, nb_tr_steps = 0, 0, 0 # Tracking variables
    train_accuracy = []
    train_loss = []
    train_acc = []
    train_prec = []
    train_rec = []
    train_f1 = []
    
    ###################################################
    
    for batch in tqdm(train_loader):
        #print("batch:", batch)
        optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch["token_type_ids"].to(device)
        labels = batch['labels'].to(device)
        #logits = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # forward pass
#         outputs = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        ###########################################################################
        model.train()
        ## this is output_5 based on our model defined above 
        logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass
        #############################################################################
        loss = loss_fn(logits, labels)
        print("loss:", loss)
        #loss = outputs[0]
        loss.backward() # backward pass
        optim.step()    # update parameters and take a step up using the computed gradient
        scheduler.step()# update learning rate scheduler
#         epoch_loss += loss.item() # update tracking variables
        train_loss.append(loss.item())
        
    
    ############# Training Accuracy Measure ##########################################
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())

        # move logits and labels to CPU
        logits = logits.detach().to('cpu').numpy()
        label_ids = labels.to('cpu').numpy()

        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()

# #         tr_accuracy += accuracy_score(labels_flat, pred_flat) # this is same as metric accuracy 
#         tr_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  


        metrics = compute_metrics(pred_flat, labels_flat)
#         print(metrics)
        
        
#         train_loss.append(loss)
        train_acc.append(metrics["accuracy"])
        train_prec.append(metrics["precision"])
        train_rec.append(metrics["recall"])
        train_f1.append(metrics["f1"])
        
#         batch_loss
#         nb_tr_steps += 1
        
#     batch_loss = np.mean(train_loss)
#     train_loss_set.append(batch_loss)
        
    
#     print(F'\n\tTraining Accuracy: {tr_accuracy / nb_tr_steps}') # accuracy is calculated twice 
#     print(F'\n\tTraining MCC Accuracy: {tr_mcc_accuracy / nb_tr_steps}') # eval M
    
#     train_loss_set.append(batch_loss) # it should be either loss or batch_loss
    
    
#     train_loss = train_loss.to('cpu').numpy()
    print(F'\n\tTrain loss: {np.mean(train_loss)}')
    print(F'\n\ttrain acc: {np.mean(train_acc)}')
    print(F'\n\ttraining prec: {np.mean(train_prec)}')
    print(F'\n\ttraining rec: {np.mean(train_rec)}')
    print(F'\n\ttraining f1: {np.mean(train_f1)}')
    
    
    ###################################################################################
    #     avg_train_loss = batch_loss / len(train_loader) # calculate avg loss over training data : we don't need it as you did it for each sample
    #     train_loss_set.append(avg_train_loss)
    #     print(F'\n\tAverage Training loss: {avg_train_loss}')
    
#     train_loss_set.append(train_loss)
#     print(F'\n\tAverage Training loss per epoch: {train_loss}')


    # store the current learning rate
    for param_group in optim.param_groups:
        print("\n\tCurrent Learning rate: ", param_group['lr'])
        learning_rate.append(param_group['lr'])
    


    
    ## ---- Validation ------
#     model.eval() # put model in evaluation mode for validation set
    
    eval_accuracy, eval_mcc_accuracy, nb_eval_steps = 0, 0, 0 # Tracking variables
    val_accuracy = []
    val_loss = []
    val_acc = []
    val_prec = []
    val_rec = []
    val_f1 = []
    
    
    # Evaluate data for one epoch
    for batch in tqdm(validation_loader):
        batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
        b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
        
        with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
          #  logits = model(b_inputs_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids) # forward pass, calculates logit predictions
#             outputs = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
            ##################################################################################
            model.eval()
            logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 

                        
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())
        
        # move logits and labels to CPU
        logits = logits.detach().to('cpu').numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()
        
#         eval_accuracy += accuracy_score(labels_flat, pred_flat)
#         eval_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  
        
        
        metrics = compute_metrics(pred_flat, labels_flat)
        val_acc.append(metrics["accuracy"])
        val_prec.append(metrics["precision"])
        val_rec.append(metrics["recall"])
        val_f1.append(metrics["f1"])
#         nb_eval_steps += 1
        
#     print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
#     print(F'\n\tValidation MCC Accuracy: {eval_mcc_accuracy / nb_eval_steps}') # eval M
    
#     val_loss = val_loss.to('cpu').numpy()
    print(F'\n\tValidation loss: {np.mean(val_loss)}')
    print(F'\n\tValidation acc: {np.mean(val_acc)}')
    print(F'\n\tValidation prec: {np.mean(val_prec)}')
    print(F'\n\tValidation rec: {np.mean(val_rec)}')
    print(F'\n\tValidation f1: {np.mean(val_f1)}')


Epoch:   0%|          | 0/35 [00:00<?, ?it/s]
  0%|          | 0/83 [00:00<?, ?it/s][A



  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)


loss: 


  1%|          | 1/83 [00:24<32:57, 24.11s/it][A

tensor(0.7208, device='cuda:0', grad_fn=<NllLossBackward>)


  _warn_prf(average, modifier, msg_start, len(result))

  2%|▏         | 2/83 [00:45<30:30, 22.60s/it][A

loss: tensor(0.9789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:07<29:24, 22.06s/it][A

tensor(0.7318, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:28<28:43, 21.81s/it][A

tensor(0.8396, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:50<28:12, 21.70s/it][A

tensor(0.7964, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:11<27:56, 21.77s/it][A

loss: tensor(0.7083, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:34<27:47, 21.94s/it][A

loss: tensor(0.7408, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:55<27:19, 21.86s/it][A

tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:17<26:55, 21.83s/it][A

tensor(0.7848, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6653, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:39<26:31, 21.79s/it][A

loss: 


 13%|█▎        | 11/83 [04:01<26:10, 21.81s/it][A

tensor(0.7073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:23<25:49, 21.82s/it][A

loss: 


 16%|█▌        | 13/83 [04:44<25:28, 21.83s/it][A

tensor(0.7490, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7160, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [05:06<25:03, 21.79s/it][A


loss: tensor(0.8001, device='cuda:0', grad_fn=<NllLossBackward>)


 18%|█▊        | 15/83 [05:28<24:39, 21.75s/it][A

loss: 


 19%|█▉        | 16/83 [05:49<24:15, 21.72s/it][A

tensor(0.6695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:11<23:51, 21.68s/it][A

tensor(0.6159, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:33<23:27, 21.66s/it][A

loss: tensor(0.6258, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:54<23:05, 21.64s/it][A

tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:16<22:43, 21.64s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.7294, device='cuda:0', grad_fn=<NllLossBackward>)


 25%|██▌       | 21/83 [07:37<22:19, 21.60s/it][A
 27%|██▋       | 22/83 [07:59<21:55, 21.56s/it][A

loss: tensor(0.6647, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [08:20<21:31, 21.53s/it][A

loss: tensor(0.6273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:42<21:11, 21.55s/it][A

tensor(0.6185, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [09:03<20:50, 21.56s/it][A

loss: tensor(0.5855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:25<20:28, 21.56s/it][A

tensor(0.5863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:47<20:06, 21.55s/it][A

tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:08<19:46, 21.57s/it][A

tensor(0.7106, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:29<19:20, 21.49s/it][A

tensor(0.5927, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:51<18:55, 21.42s/it][A

tensor(0.6841, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:12<18:30, 21.35s/it][A

tensor(0.6428, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:33<18:06, 21.30s/it][A

loss: tensor(0.6003, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:54<17:43, 21.27s/it][A

tensor(0.7730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:16<17:22, 21.27s/it][A

tensor(0.7833, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:37<17:00, 21.25s/it][A

tensor(0.6836, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5960, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:58<16:38, 21.24s/it][A

loss: 


 45%|████▍     | 37/83 [13:19<16:16, 21.22s/it][A

tensor(0.6752, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:40<15:53, 21.19s/it][A

loss: tensor(0.7685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:01<15:32, 21.20s/it][A

tensor(0.5220, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [14:23<15:11, 21.20s/it][A

loss: tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:44<14:51, 21.22s/it][A

loss: tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [15:05<14:30, 21.22s/it][A

loss: tensor(0.6518, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [15:26<14:08, 21.21s/it][A

loss: tensor(0.6891, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:48<13:47, 21.23s/it][A

tensor(0.6743, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:09<13:26, 21.23s/it][A

tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [16:30<13:06, 21.25s/it][A

loss: tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:51<12:45, 21.27s/it][A

tensor(0.7395, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [17:13<12:25, 21.31s/it][A

loss: tensor(0.7489, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6106, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:34<12:06, 21.37s/it][A
 60%|██████    | 50/83 [17:56<11:45, 21.38s/it][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [18:17<11:24, 21.38s/it][A

loss: tensor(0.5763, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:39<11:02, 21.39s/it][A

loss: tensor(0.6487, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [19:00<10:41, 21.38s/it][A

loss: tensor(0.6126, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [19:21<10:19, 21.37s/it][A

loss: tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:42<09:56, 21.31s/it][A

tensor(0.4684, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [20:04<09:33, 21.23s/it][A

loss: tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:25<09:12, 21.24s/it][A

tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:46<08:50, 21.22s/it][A

loss: tensor(0.6771, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [21:07<08:29, 21.22s/it][A

loss: tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [21:29<08:09, 21.30s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:50<07:49, 21.36s/it][A

tensor(0.6038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:12<07:29, 21.39s/it][A

tensor(0.4947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:33<07:07, 21.39s/it][A

tensor(0.5702, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:54<06:46, 21.38s/it][A

loss: tensor(0.6454, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [23:16<06:24, 21.34s/it][A

loss: tensor(0.5055, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [23:37<06:03, 21.36s/it][A

loss: tensor(0.7673, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:59<05:42, 21.43s/it][A

loss: tensor(0.7351, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [24:20<05:21, 21.45s/it][A

loss: tensor(0.5889, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [24:41<04:59, 21.41s/it][A

loss: tensor(0.6657, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [25:03<04:38, 21.39s/it][A

loss: tensor(0.6806, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:24<04:17, 21.45s/it][A

tensor(0.5647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:46<03:55, 21.40s/it][A

tensor(0.6796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:07<03:33, 21.38s/it][A

tensor(0.5474, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:28<03:12, 21.38s/it][A

tensor(0.6719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:50<02:50, 21.35s/it][A

tensor(0.7188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:11<02:29, 21.34s/it][A

tensor(0.5844, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:32<02:08, 21.37s/it][A

tensor(0.6182, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:54<01:47, 21.44s/it][A

tensor(0.7125, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [28:16<01:25, 21.50s/it][A

loss: tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [28:37<01:04, 21.60s/it][A

loss: tensor(0.5522, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:59<00:43, 21.61s/it][A

tensor(0.7065, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:21<00:21, 21.63s/it][A

tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:28<00:00, 21.31s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5947, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6627969020102398

	train acc: 0.6191983844468784

	training prec: 0.8244645994237823

	training rec: 0.6191983844468784

	training f1: 0.6549377275359946

	Current Learning rate:  0.0009714285714285714



  2%|▏         | 1/42 [00:02<01:56,  2.84s/it][A
  5%|▍         | 2/42 [00:05<01:53,  2.85s/it][A
  7%|▋         | 3/42 [00:08<01:50,  2.83s/it][A
 10%|▉         | 4/42 [00:11<01:47,  2.83s/it][A
 12%|█▏        | 5/42 [00:14<01:45,  2.84s/it][A
 14%|█▍        | 6/42 [00:17<01:41,  2.83s/it][A
 17%|█▋        | 7/42 [00:19<01:39,  2.84s/it][A
 19%|█▉        | 8/42 [00:22<01:37,  2.86s/it][A
 21%|██▏       | 9/42 [00:25<01:34,  2.86s/it][A
 24%|██▍       | 10/42 [00:28<01:31,  2.85s/it][A
 26%|██▌       | 11/42 [00:31<01:28,  2.85s/it][A
 29%|██▊       | 12/42 [00:34<01:25,  2.85s/it][A
 31%|███       | 13/42 [00:36<01:21,  2.83s/it][A
 33%|███▎      | 14/42 [00:39<01:18,  2.81s/it][A
 36%|███▌      | 15/42 [00:42<01:15,  2.80s/it][A
 38%|███▊      | 16/42 [00:45<01:12,  2.78s/it][A
 40%|████      | 17/42 [00:47<01:09,  2.77s/it][A
 43%|████▎     | 18/42 [00:50<01:06,  2.77s/it][A
 45%|████▌     | 19/42 [00:53<01:03,  2.77s/it][A
 48%|████▊     | 20/42 [00:56<01:01,  2


	Validation loss: 0.6573287809178943

	Validation acc: 0.45907738095238093

	Validation prec: 0.8749152831069732

	Validation rec: 0.45907738095238093

	Validation f1: 0.5268295552578619
loss: 


  1%|          | 1/83 [00:21<29:53, 21.88s/it][A

tensor(0.6070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:33, 21.89s/it][A

tensor(0.7105, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:05<29:17, 21.97s/it][A

tensor(0.6869, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:27<28:52, 21.93s/it][A

tensor(0.6016, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:49<28:28, 21.91s/it][A

tensor(0.6155, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:11<28:03, 21.86s/it][A

tensor(0.6817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:33<27:42, 21.87s/it][A

tensor(0.5459, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:55<27:24, 21.92s/it][A

loss: tensor(0.4722, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:17<27:04, 21.96s/it][A

loss: tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:39<26:41, 21.94s/it][A

loss: tensor(0.5096, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:01<26:21, 21.96s/it][A

tensor(0.5893, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.4972, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:23<25:58, 21.95s/it][A
 16%|█▌        | 13/83 [04:44<25:33, 21.91s/it][A

loss: tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [05:06<25:10, 21.90s/it][A

loss: tensor(0.6878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:28<24:45, 21.84s/it][A

tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:50<24:18, 21.77s/it][A

tensor(0.6943, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:11<23:50, 21.68s/it][A

tensor(0.6739, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:33<23:25, 21.63s/it]

loss: tensor(0.5342, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 23%|██▎       | 19/83 [06:54<22:59, 21.56s/it][A

tensor(0.6772, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:15<22:35, 21.51s/it][A

tensor(0.6949, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:37<22:13, 21.51s/it][A

tensor(0.7910, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:58<21:51, 21.49s/it][A

tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [08:20<21:23, 21.39s/it][A

loss: tensor(0.6680, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:41<20:56, 21.30s/it][A

tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [09:02<20:32, 21.26s/it][A

tensor(0.6389, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.7106, device='cuda:0', grad_fn=<NllLossBackward>)


 31%|███▏      | 26/83 [09:23<20:08, 21.21s/it][A

loss: 


 33%|███▎      | 27/83 [09:44<19:44, 21.15s/it][A

tensor(0.5022, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [10:05<19:22, 21.13s/it][A

tensor(0.7824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:26<19:02, 21.15s/it][A

tensor(0.5230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:48<18:46, 21.26s/it][A

tensor(0.5919, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:09<18:28, 21.32s/it][A

tensor(0.7351, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:31<18:08, 21.35s/it][A

tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:52<17:49, 21.38s/it][A

tensor(0.6538, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:14<17:36, 21.55s/it][A

tensor(0.6270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:36<17:21, 21.70s/it][A

tensor(0.5386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:58<16:56, 21.64s/it][A

tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:19<16:34, 21.61s/it][A

tensor(0.4819, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:40<16:09, 21.54s/it][A

tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [14:02<15:44, 21.46s/it][A

tensor(0.5929, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:23<15:21, 21.43s/it][A

tensor(0.4595, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.7262, device='cuda:0', grad_fn=<NllLossBackward>)


 49%|████▉     | 41/83 [14:44<14:59, 21.42s/it][A

loss: 


 51%|█████     | 42/83 [15:06<14:37, 21.41s/it][A

tensor(0.7652, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [15:27<14:15, 21.39s/it][A

loss: tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:49<13:54, 21.40s/it][A

tensor(0.6513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:10<13:34, 21.42s/it][A

tensor(0.6075, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [16:31<13:11, 21.40s/it][A

loss: tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:53<12:48, 21.34s/it][A

tensor(0.7855, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [17:14<12:26, 21.33s/it][A

loss: tensor(0.5833, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [17:35<12:03, 21.28s/it][A

loss: tensor(0.5908, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:56<11:40, 21.22s/it][A

loss: tensor(0.7043, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [18:17<11:19, 21.22s/it][A

loss: tensor(0.6713, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [18:39<10:59, 21.28s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:01<10:46, 21.54s/it][A

tensor(0.5918, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:23<10:28, 21.67s/it][A

tensor(0.7609, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:45<10:09, 21.76s/it][A

tensor(0.7631, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [20:07<09:48, 21.79s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:28<09:23, 21.68s/it][A

tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:50<08:59, 21.58s/it][A

loss: tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [21:11<08:38, 21.60s/it][A

loss: tensor(0.5545, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [21:33<08:17, 21.65s/it][A

loss: tensor(0.7319, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:55<07:55, 21.62s/it][A

tensor(0.7278, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:16<07:35, 21.71s/it][A

tensor(0.7807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:38<07:15, 21.75s/it][A

tensor(0.6712, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [23:00<06:53, 21.78s/it][A

tensor(0.6574, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [23:22<06:32, 21.81s/it][A

tensor(0.5988, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:44<06:11, 21.84s/it][A

tensor(0.6443, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [24:06<05:49, 21.86s/it][A

loss: tensor(0.6726, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [24:28<05:28, 21.89s/it][A

tensor(0.6731, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:50<05:06, 21.90s/it][A

tensor(0.7182, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:12<04:44, 21.89s/it][A

tensor(0.5902, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:33<04:22, 21.88s/it][A

tensor(0.5319, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:55<04:00, 21.88s/it][A

tensor(0.6543, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [26:17<03:39, 21.91s/it][A

tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [26:39<03:17, 21.89s/it][A

loss: tensor(0.5681, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [27:01<02:55, 21.88s/it][A

loss: tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [27:23<02:32, 21.85s/it][A

loss: tensor(0.6145, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [27:44<02:10, 21.75s/it][A

loss: tensor(0.6611, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [28:06<01:48, 21.69s/it][A

loss: tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:27<01:26, 21.64s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:49<01:04, 21.61s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [29:10<00:43, 21.59s/it][A

tensor(0.6185, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:32<00:21, 21.68s/it][A

tensor(0.6822, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:40<00:00, 21.45s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5845, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6378881977983268

	train acc: 0.6392730010952903

	training prec: 0.8474003840705832

	training rec: 0.6392730010952903

	training f1: 0.6866473439495172

	Current Learning rate:  0.0009428571428571429



  2%|▏         | 1/42 [00:02<01:54,  2.80s/it][A
  5%|▍         | 2/42 [00:05<01:50,  2.76s/it][A
  7%|▋         | 3/42 [00:08<01:48,  2.77s/it][A
 10%|▉         | 4/42 [00:11<01:44,  2.75s/it][A
 12%|█▏        | 5/42 [00:13<01:41,  2.75s/it][A
 14%|█▍        | 6/42 [00:16<01:38,  2.75s/it][A
 17%|█▋        | 7/42 [00:19<01:36,  2.76s/it][A
 19%|█▉        | 8/42 [00:22<01:33,  2.76s/it][A
 21%|██▏       | 9/42 [00:24<01:31,  2.77s/it][A
 24%|██▍       | 10/42 [00:27<01:29,  2.78s/it][A
 26%|██▌       | 11/42 [00:30<01:26,  2.78s/it][A
 29%|██▊       | 12/42 [00:33<01:23,  2.78s/it][A
 31%|███       | 13/42 [00:36<01:20,  2.79s/it][A
 33%|███▎      | 14/42 [00:38<01:17,  2.78s/it][A
 36%|███▌      | 15/42 [00:41<01:15,  2.78s/it][A
 38%|███▊      | 16/42 [00:44<01:12,  2.80s/it][A
 40%|████      | 17/42 [00:47<01:09,  2.78s/it][A
 43%|████▎     | 18/42 [00:49<01:06,  2.78s/it][A
 45%|████▌     | 19/42 [00:52<01:04,  2.79s/it][A
 48%|████▊     | 20/42 [00:55<01:00,  2


	Validation loss: 0.6014868099065054

	Validation acc: 0.6056547619047619

	Validation prec: 0.8631926170264066

	Validation rec: 0.6056547619047619

	Validation f1: 0.6727914586044955
loss: 


  1%|          | 1/83 [00:21<29:32, 21.61s/it][A

tensor(0.5821, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:43<29:09, 21.59s/it][A

tensor(0.5614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:04<28:45, 21.57s/it][A

tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:26<28:27, 21.62s/it][A

tensor(0.6070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:48<28:07, 21.63s/it][A

tensor(0.7548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:09<27:43, 21.61s/it][A

tensor(0.5600, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:31<27:20, 21.58s/it][A

tensor(0.5510, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:52<26:52, 21.50s/it][A

loss: tensor(0.6856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:13<26:22, 21.39s/it][A

tensor(0.6404, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:34<25:57, 21.34s/it][A

loss: tensor(0.6037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:56<25:33, 21.30s/it][A

tensor(0.6472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:17<25:11, 21.29s/it][A

tensor(0.5229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:38<24:48, 21.27s/it][A

tensor(0.5242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:59<24:27, 21.27s/it][A

tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:21<24:12, 21.36s/it][A

loss: tensor(0.5514, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:43<23:56, 21.44s/it][A

tensor(0.5275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:04<23:42, 21.55s/it][A

tensor(0.5632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:26<23:23, 21.59s/it][A

tensor(0.6136, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:48<23:02, 21.60s/it][A

tensor(0.7147, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:09<22:42, 21.62s/it][A

tensor(0.5917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:31<22:21, 21.64s/it][A

tensor(0.6537, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:52<21:56, 21.58s/it][A

tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [08:14<21:34, 21.57s/it][A

loss: tensor(0.5297, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:36<21:14, 21.60s/it][A

loss: tensor(0.5415, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:57<20:53, 21.61s/it][A

loss: tensor(0.5601, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:19<20:33, 21.65s/it][A

tensor(0.5902, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:41<20:14, 21.68s/it][A

tensor(0.5014, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [10:03<19:53, 21.70s/it][A

loss: tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:24<19:30, 21.68s/it][A

loss: tensor(0.5864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:46<19:10, 21.70s/it][A

tensor(0.7518, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [11:08<18:46, 21.67s/it][A

loss: tensor(0.6291, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:29<18:27, 21.71s/it][A

tensor(0.6418, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:51<18:08, 21.76s/it][A

tensor(0.6118, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:13<17:44, 21.72s/it][A

tensor(0.5169, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:34<17:21, 21.70s/it][A

tensor(0.5139, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:56<16:59, 21.70s/it][A

tensor(0.7267, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [13:18<16:36, 21.66s/it][A

tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:40<16:16, 21.69s/it][A

tensor(0.6196, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [14:01<15:54, 21.70s/it][A

loss: tensor(0.4812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [14:23<15:32, 21.68s/it][A

tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:45<15:10, 21.68s/it][A

loss: tensor(0.5571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [15:06<14:47, 21.64s/it][A

tensor(0.5854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:27<14:22, 21.56s/it][A

tensor(0.5663, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:49<13:59, 21.53s/it][A

tensor(0.5268, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [16:10<13:37, 21.50s/it][A

tensor(0.6186, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [16:32<13:17, 21.55s/it][A

tensor(0.5443, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:54<12:57, 21.60s/it][A

loss: tensor(0.7103, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [17:15<12:36, 21.61s/it][A

tensor(0.6417, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:37<12:12, 21.54s/it][A

tensor(0.5909, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:58<11:51, 21.55s/it][A

tensor(0.6846, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [18:20<11:30, 21.57s/it][A

tensor(0.5287, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:42<11:08, 21.57s/it][A

loss: tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [19:03<10:48, 21.61s/it][A

tensor(0.6136, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [19:25<10:27, 21.64s/it][A

tensor(0.7142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:47<10:06, 21.66s/it][A

tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [20:08<09:45, 21.68s/it][A

loss: tensor(0.5592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [20:30<09:19, 21.52s/it][A

tensor(0.5171, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:51<08:55, 21.42s/it][A

loss: tensor(0.7451, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [21:12<08:31, 21.32s/it][A

loss: tensor(0.4616, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [21:33<08:08, 21.25s/it][A

loss: tensor(0.9449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:54<07:48, 21.28s/it][A

tensor(0.7377, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [22:16<07:27, 21.31s/it][A

tensor(0.7035, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [22:37<07:07, 21.36s/it][A

loss: tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:58<06:46, 21.37s/it][A

loss: tensor(0.5602, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [23:20<06:23, 21.29s/it][A

loss: tensor(0.6122, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:41<06:01, 21.24s/it][A

tensor(0.7065, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [24:02<05:38, 21.18s/it][A

loss: tensor(0.7068, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [24:23<05:16, 21.13s/it][A

loss: tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:44<04:55, 21.10s/it][A

tensor(0.6395, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [25:05<04:35, 21.20s/it][A

tensor(0.5745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:26<04:13, 21.17s/it][A

tensor(0.5058, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [25:47<03:51, 21.04s/it][A

loss: tensor(0.5370, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [26:08<03:29, 20.94s/it][A

loss: tensor(0.6633, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [26:28<03:07, 20.84s/it][A

loss: tensor(0.7051, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [26:49<02:46, 20.82s/it][A

loss: tensor(0.6155, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [27:10<02:25, 20.86s/it][A

tensor(0.7012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:31<02:05, 20.90s/it][A

tensor(0.4848, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:52<01:44, 20.89s/it][A

tensor(0.5241, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [28:13<01:23, 20.92s/it][A

tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:34<01:02, 20.94s/it][A

tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:55<00:41, 20.94s/it][A

tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:16<00:20, 20.90s/it][A

tensor(0.7175, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:23<00:00, 21.25s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6806, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6176137529223799

	train acc: 0.6519201807228916

	training prec: 0.8513011438677046

	training rec: 0.6519201807228916

	training f1: 0.7064385554777711

	Current Learning rate:  0.0009142857142857143



  2%|▏         | 1/42 [00:02<01:48,  2.65s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.69s/it][A
  7%|▋         | 3/42 [00:08<01:44,  2.68s/it][A
 10%|▉         | 4/42 [00:10<01:42,  2.69s/it][A
 12%|█▏        | 5/42 [00:13<01:39,  2.69s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.67s/it][A
 19%|█▉        | 8/42 [00:21<01:31,  2.68s/it][A
 21%|██▏       | 9/42 [00:24<01:27,  2.67s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.67s/it][A
 26%|██▌       | 11/42 [00:29<01:23,  2.68s/it][A
 29%|██▊       | 12/42 [00:32<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.68s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:40<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.67s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.67s/it][A
 43%|████▎     | 18/42 [00:48<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.65s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6088322344280425

	Validation acc: 0.6577380952380952

	Validation prec: 0.8549183445052809

	Validation rec: 0.6577380952380952

	Validation f1: 0.7168923329333406



  1%|          | 1/83 [00:20<28:07, 20.58s/it][A

loss: tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:45, 20.56s/it][A

tensor(0.6334, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:28, 20.60s/it][A

tensor(0.5679, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:12, 20.67s/it][A

tensor(0.6292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:53, 20.69s/it][A

tensor(0.5459, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:35, 20.72s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:14, 20.72s/it][A

tensor(0.7035, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:52, 20.71s/it][A

tensor(0.5382, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:31, 20.70s/it][A

tensor(0.5669, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:26<25:13, 20.74s/it][A

tensor(0.5274, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:51, 20.72s/it][A

tensor(0.5686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:28, 20.69s/it][A

tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:28<24:07, 20.68s/it][A

tensor(0.5712, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:49<23:47, 20.69s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:26, 20.69s/it][A

tensor(0.5687, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:03, 20.65s/it][A

tensor(0.4963, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:51<22:41, 20.63s/it][A

loss: tensor(0.5567, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:20, 20.62s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:32<21:59, 20.61s/it][A

tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:53<21:38, 20.61s/it][A

tensor(0.5948, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:14<21:21, 20.67s/it][A

tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:34<20:58, 20.64s/it][A

loss: tensor(0.6369, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:55<20:37, 20.62s/it][A

tensor(0.6710, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:15<20:16, 20.62s/it][A

tensor(0.5944, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<19:56, 20.64s/it][A

tensor(0.5549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:57<19:38, 20.68s/it][A

tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:17<19:19, 20.70s/it][A

tensor(0.6013, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:38<19:00, 20.74s/it][A

tensor(0.6342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:59<18:39, 20.73s/it][A

tensor(0.5935, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:20<18:17, 20.72s/it][A

tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:40<17:54, 20.66s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:01<17:33, 20.65s/it][A

loss: tensor(0.5999, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:21<17:11, 20.63s/it][A

tensor(0.4880, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:42<16:51, 20.63s/it][A

tensor(0.6167, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:03<16:30, 20.64s/it][A

tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:23<16:11, 20.66s/it][A

tensor(0.5135, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:44<15:50, 20.66s/it][A

tensor(0.7255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:05<15:29, 20.65s/it][A

tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:25<15:07, 20.63s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:46<14:48, 20.65s/it][A

tensor(0.5436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:07<14:27, 20.65s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:27<14:07, 20.67s/it][A

tensor(0.5845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:48<13:47, 20.68s/it][A
 53%|█████▎    | 44/83 [15:09<13:26, 20.68s/it][A

loss: tensor(0.5104, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:29<13:06, 20.69s/it][A

tensor(0.5597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:50<12:45, 20.70s/it][A

tensor(0.5950, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6862, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:11<12:23, 20.66s/it][A

loss: 


 58%|█████▊    | 48/83 [16:32<12:04, 20.69s/it][A

tensor(0.6568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:52<11:43, 20.70s/it][A

tensor(0.6305, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:13<11:23, 20.71s/it][A

tensor(0.5924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:34<11:04, 20.77s/it][A

tensor(0.5416, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [17:54<10:42, 20.71s/it][A

loss: tensor(0.5147, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:15<10:21, 20.71s/it][A

tensor(0.6519, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:36<09:59, 20.67s/it][A

loss: tensor(0.7775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:57<09:40, 20.72s/it][A

tensor(0.6010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:17<09:18, 20.70s/it][A

tensor(0.5881, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5671, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:38<08:58, 20.72s/it][A

loss: 


 70%|██████▉   | 58/83 [19:59<08:38, 20.73s/it][A

tensor(0.5523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:19<08:17, 20.71s/it][A

tensor(0.5649, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:40<07:56, 20.71s/it][A

loss: tensor(0.6256, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [21:01<07:35, 20.70s/it][A

loss: tensor(0.5916, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:21<07:13, 20.65s/it][A

loss: tensor(0.7423, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:42<06:52, 20.65s/it][A

loss: tensor(0.5996, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:03<06:31, 20.61s/it][A

loss: tensor(0.7302, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:23<06:12, 20.71s/it][A

tensor(0.5934, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:44<05:51, 20.70s/it][A

tensor(0.5837, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:05<05:31, 20.71s/it][A

tensor(0.6253, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:26<05:10, 20.71s/it][A

loss: tensor(0.5833, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:46<04:49, 20.70s/it][A

loss: tensor(0.6810, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:07<04:28, 20.68s/it][A

tensor(0.4551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:28<04:08, 20.67s/it][A

tensor(0.5326, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:48<03:47, 20.67s/it][A

tensor(0.6428, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:09<03:27, 20.70s/it][A

tensor(0.5855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:30<03:06, 20.71s/it][A

tensor(0.5336, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:50<02:45, 20.73s/it][A

tensor(0.7053, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:11<02:25, 20.73s/it][A

loss: tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:32<02:04, 20.77s/it][A

tensor(0.5444, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:53<01:43, 20.78s/it][A

tensor(0.5880, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:14<01:23, 20.79s/it][A

tensor(0.7614, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:34<01:02, 20.78s/it][A

tensor(0.6589, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [27:55<00:41, 20.75s/it][A

loss: tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:16<00:20, 20.76s/it][A

tensor(0.6576, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:23<00:00, 20.52s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4301, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6053731743111668

	train acc: 0.6603059967141293

	training prec: 0.8481167042201286

	training rec: 0.6603059967141293

	training f1: 0.7154723019614018

	Current Learning rate:  0.0008857142857142857



  2%|▏         | 1/42 [00:02<01:48,  2.65s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.592734062955493

	Validation acc: 0.6783234126984127

	Validation prec: 0.8603534821614073

	Validation rec: 0.6783234126984127

	Validation f1: 0.7365182327613533
loss: 


  1%|          | 1/83 [00:20<28:29, 20.85s/it][A

tensor(0.6776, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:03, 20.78s/it][A

tensor(0.5038, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:02<27:36, 20.71s/it][A

loss: tensor(0.6009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:17, 20.72s/it][A

tensor(0.5136, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:56, 20.72s/it][A

tensor(0.7427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:36, 20.74s/it][A

tensor(0.5329, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:15, 20.73s/it][A

tensor(0.5668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:50, 20.68s/it][A

tensor(0.5647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:29, 20.67s/it][A

tensor(0.4431, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:26<25:06, 20.63s/it][A

tensor(0.6164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:44, 20.61s/it][A

tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:23, 20.61s/it][A

tensor(0.6104, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:28<24:00, 20.59s/it][A

tensor(0.7134, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:49<23:41, 20.60s/it][A

tensor(0.6837, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:09<23:23, 20.63s/it][A

tensor(0.5211, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:02, 20.63s/it][A

tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:51<22:42, 20.64s/it][A

tensor(0.5501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:11<22:22, 20.65s/it][A

tensor(0.5105, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:32<22:00, 20.63s/it][A

tensor(0.9686, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:53<21:38, 20.61s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:13<21:16, 20.59s/it][A

tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:34<20:55, 20.58s/it][A

tensor(0.7720, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:54<20:33, 20.56s/it][A

loss: tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:15<20:13, 20.58s/it][A

tensor(0.6716, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:35<19:54, 20.59s/it][A

tensor(0.5387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:56<19:32, 20.57s/it][A

tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:16<19:10, 20.54s/it][A

tensor(0.6233, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:37<18:48, 20.52s/it][A

loss: tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:57<18:28, 20.54s/it][A

tensor(0.6868, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:18<18:09, 20.55s/it][A

loss: tensor(0.5662, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:39<17:48, 20.55s/it][A

tensor(0.6539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:59<17:27, 20.54s/it][A

tensor(0.8096, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:20<17:06, 20.52s/it]

loss: tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 41%|████      | 34/83 [11:40<16:45, 20.52s/it][A

tensor(0.6073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:01<16:23, 20.50s/it][A

tensor(0.5795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:21<16:04, 20.51s/it][A

tensor(0.6957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7936, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:42<15:44, 20.53s/it][A

loss: 


 46%|████▌     | 38/83 [13:02<15:24, 20.55s/it][A

tensor(0.6308, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:23<15:04, 20.56s/it][A

tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:43<14:44, 20.58s/it][A

tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:04<14:27, 20.65s/it][A

loss: tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:25<14:09, 20.71s/it][A

loss: tensor(0.6298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:46<13:49, 20.73s/it][A

tensor(0.4641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:07<13:28, 20.74s/it][A

tensor(0.5649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:28<13:10, 20.80s/it][A

tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:49<12:51, 20.86s/it][A

tensor(0.6752, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:10<12:31, 20.88s/it][A

tensor(0.5270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:30<12:10, 20.88s/it][A

tensor(0.5545, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:51<11:49, 20.88s/it][A

tensor(0.6598, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:12<11:28, 20.87s/it][A

tensor(0.7503, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:33<11:06, 20.83s/it][A

loss: tensor(0.6337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:54<10:44, 20.80s/it][A

tensor(0.6207, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:14<10:23, 20.79s/it][A

loss: tensor(0.6072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:35<10:02, 20.78s/it][A

tensor(0.4548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:56<09:41, 20.76s/it][A

tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:17<09:19, 20.73s/it][A

tensor(0.5327, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:37<08:58, 20.72s/it][A

loss: tensor(0.7636, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.4831, device='cuda:0', grad_fn=<NllLossBackward>)


 70%|██████▉   | 58/83 [19:58<08:37, 20.70s/it][A
 71%|███████   | 59/83 [20:18<08:15, 20.64s/it][A

loss: tensor(0.6302, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:39<07:55, 20.67s/it][A

tensor(0.5513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:00<07:34, 20.68s/it][A

tensor(0.5521, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:20<07:12, 20.61s/it][A

loss: tensor(0.6786, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:41<06:51, 20.58s/it][A

loss: tensor(0.6779, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:01<06:30, 20.55s/it][A

loss: tensor(0.7573, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:22<06:09, 20.53s/it][A

loss: tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:42<05:49, 20.57s/it][A

tensor(0.6706, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:03<05:28, 20.56s/it][A

tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:24<05:08, 20.58s/it][A

tensor(0.6821, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:44<04:47, 20.57s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:05<04:27, 20.60s/it][A

loss: tensor(0.6441, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:25<04:06, 20.56s/it][A

loss: tensor(0.5941, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:46<03:46, 20.59s/it][A

tensor(0.6753, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:07<03:26, 20.66s/it][A

tensor(0.5447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:28<03:06, 20.70s/it][A

tensor(0.5661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [25:48<02:45, 20.72s/it][A

loss: 


 92%|█████████▏| 76/83 [26:09<02:25, 20.75s/it][A

tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:30<02:04, 20.78s/it][A

tensor(0.5984, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [26:51<01:43, 20.75s/it][A

loss: tensor(0.5559, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:11<01:22, 20.73s/it][A

loss: tensor(0.5771, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:32<01:02, 20.76s/it][A

tensor(0.5721, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:53<00:41, 20.77s/it][A

tensor(0.5165, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6990, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:14<00:20, 20.77s/it][A
100%|██████████| 83/83 [28:21<00:00, 20.50s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6228550712028181

	train acc: 0.6591593647316539

	training prec: 0.848748596337971

	training rec: 0.6591593647316539

	training f1: 0.7114866220226861

	Current Learning rate:  0.0008571428571428571



  2%|▏         | 1/42 [00:02<01:50,  2.70s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:08<01:44,  2.67s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.67s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.67s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.67s/it][A
 29%|██▊       | 12/42 [00:31<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.68s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.67s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:04,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6159521043300629

	Validation acc: 0.7653769841269842

	Validation prec: 0.8516350039810073

	Validation rec: 0.7653769841269842

	Validation f1: 0.7980485407838971



  1%|          | 1/83 [00:20<28:23, 20.77s/it][A

loss: tensor(0.5555, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:05, 20.80s/it][A

tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:40, 20.76s/it][A

tensor(0.6118, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:22<27:16, 20.72s/it][A

loss: tensor(0.6459, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:59, 20.77s/it][A

tensor(0.5824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:38, 20.76s/it][A

tensor(0.5623, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:17, 20.76s/it][A

tensor(0.5787, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:00, 20.80s/it][A

tensor(0.5054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:40, 20.81s/it][A

tensor(0.5122, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7509, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:27<25:21, 20.84s/it][A

loss: 


 13%|█▎        | 11/83 [03:48<24:59, 20.82s/it][A

tensor(0.4976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6142, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:09<24:37, 20.81s/it][A

loss: tensor(0.6380, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:30<24:16, 20.81s/it][A

loss: 


 17%|█▋        | 14/83 [04:50<23:52, 20.76s/it][A

tensor(0.5603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:11<23:27, 20.70s/it][A

tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:32<23:03, 20.65s/it][A

tensor(0.6532, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:52<22:40, 20.61s/it][A

tensor(0.7064, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:13<22:20, 20.63s/it][A

loss: tensor(0.6296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:33<21:59, 20.62s/it][A

tensor(0.5742, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:54<21:40, 20.64s/it][A

tensor(0.6105, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:15<21:20, 20.65s/it][A

tensor(0.6152, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:35<20:59, 20.65s/it][A

tensor(0.4877, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:56<20:41, 20.69s/it][A

tensor(0.5892, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:17<20:21, 20.70s/it][A

tensor(0.6950, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:38<20:01, 20.72s/it][A

loss: 


 31%|███▏      | 26/83 [08:58<19:38, 20.67s/it][A

tensor(0.6434, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:19<19:17, 20.66s/it][A

tensor(0.4742, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:39<18:55, 20.65s/it][A

tensor(0.6082, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:00<18:34, 20.65s/it][A

tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:21<18:13, 20.63s/it][A

loss: tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:41<17:54, 20.66s/it][A

tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:02<17:34, 20.69s/it][A

tensor(0.6077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:23<17:16, 20.72s/it][A

tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:44<16:54, 20.70s/it][A

tensor(0.6709, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5863, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:04<16:34, 20.71s/it][A

loss: 


 43%|████▎     | 36/83 [12:25<16:14, 20.74s/it][A

tensor(0.6241, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:46<15:54, 20.74s/it][A

loss: tensor(0.5038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:07<15:31, 20.70s/it][A

tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:27<15:13, 20.75s/it][A

tensor(0.6996, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:48<14:51, 20.73s/it][A

tensor(0.6267, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:09<14:30, 20.72s/it][A

loss: tensor(0.6927, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7032, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:29<14:08, 20.69s/it][A

loss: 


 52%|█████▏    | 43/83 [14:50<13:46, 20.66s/it][A

tensor(0.6095, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:11<13:24, 20.63s/it][A

loss: tensor(0.7137, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:31<13:04, 20.63s/it][A

loss: tensor(0.6939, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:52<12:42, 20.61s/it][A

tensor(0.6189, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:12<12:21, 20.60s/it][A

loss: tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:33<12:00, 20.60s/it][A

tensor(0.7371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:54<11:41, 20.62s/it][A

tensor(0.6297, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:14<11:20, 20.63s/it][A

tensor(0.5862, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6478, device='cuda:0', grad_fn=<NllLossBackward>)


 61%|██████▏   | 51/83 [17:35<11:00, 20.65s/it][A

loss: 


 63%|██████▎   | 52/83 [17:56<10:40, 20.65s/it][A

tensor(0.6424, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:16<10:19, 20.66s/it][A

loss: tensor(0.6152, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:37<09:58, 20.64s/it][A

tensor(0.7067, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:57<09:37, 20.62s/it][A

tensor(0.5359, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:18<09:17, 20.64s/it][A

tensor(0.6386, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:39<08:54, 20.57s/it][A

loss: tensor(0.5283, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [19:59<08:33, 20.55s/it][A

loss: tensor(0.7050, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:20<08:13, 20.54s/it][A

loss: tensor(0.6376, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:40<07:51, 20.51s/it][A

loss: tensor(0.5517, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:01<07:31, 20.53s/it][A

tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:21<07:11, 20.56s/it]

loss: tensor(0.6517, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 76%|███████▌  | 63/83 [21:42<06:52, 20.61s/it][A

tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:03<06:31, 20.62s/it][A

tensor(0.5508, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:23<06:10, 20.61s/it][A

loss: tensor(0.5830, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:44<05:50, 20.61s/it][A

tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:04<05:29, 20.62s/it][A

tensor(0.7458, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:25<05:09, 20.63s/it][A

tensor(0.5507, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:46<04:48, 20.62s/it][A

tensor(0.7728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:06<04:28, 20.64s/it][A

tensor(0.6232, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:27<04:08, 20.68s/it][A

tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [24:48<03:47, 20.67s/it][A

loss: tensor(0.5133, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:08<03:26, 20.66s/it][A

tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:29<03:05, 20.66s/it][A

tensor(0.7656, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:50<02:45, 20.65s/it][A

tensor(0.4296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:10<02:24, 20.61s/it][A

tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:31<02:03, 20.66s/it][A

tensor(0.7111, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [26:52<01:43, 20.70s/it]

loss: tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 95%|█████████▌| 79/83 [27:13<01:22, 20.72s/it][A

tensor(0.6678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:33<01:02, 20.77s/it][A

tensor(0.5939, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:54<00:41, 20.79s/it][A

tensor(0.5514, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:15<00:20, 20.77s/it][A

loss: tensor(0.5364, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:22<00:00, 20.51s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5372, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6181827959526016

	train acc: 0.6599808324205915

	training prec: 0.8460035431380969

	training rec: 0.6599808324205915

	training f1: 0.7143893452809271

	Current Learning rate:  0.0008285714285714286



  2%|▏         | 1/42 [00:02<01:48,  2.64s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.67s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.67s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.67s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.68s/it][A
 19%|█▉        | 8/42 [00:21<01:31,  2.69s/it][A
 21%|██▏       | 9/42 [00:24<01:28,  2.68s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.68s/it][A
 26%|██▌       | 11/42 [00:29<01:23,  2.68s/it][A
 29%|██▊       | 12/42 [00:32<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.67s/it][A
 33%|███▎      | 14/42 [00:37<01:15,  2.68s/it][A
 36%|███▌      | 15/42 [00:40<01:11,  2.67s/it][A
 38%|███▊      | 16/42 [00:42<01:10,  2.71s/it][A
 40%|████      | 17/42 [00:45<01:07,  2.71s/it][A
 43%|████▎     | 18/42 [00:48<01:04,  2.69s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.69s/it][A
 48%|████▊     | 20/42 [00:53<00:59,  2


	Validation loss: 0.6308652219318208

	Validation acc: 0.5305059523809523

	Validation prec: 0.8711581957815986

	Validation rec: 0.5305059523809523

	Validation f1: 0.6085440669178694
loss: 


  1%|          | 1/83 [00:20<28:29, 20.84s/it][A

tensor(0.5369, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:09, 20.86s/it][A

tensor(0.5530, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:02<27:47, 20.85s/it][A

loss: tensor(0.5877, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:27, 20.85s/it][A

tensor(0.6062, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:44<27:03, 20.81s/it][A

loss: tensor(0.5481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:05<26:47, 20.87s/it][A

tensor(0.6561, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:27, 20.89s/it][A

tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<26:05, 20.87s/it][A

tensor(0.6093, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:07<25:44, 20.87s/it][A

tensor(0.6803, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:23, 20.87s/it][A

tensor(0.6479, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:49<25:02, 20.87s/it][A

loss: tensor(0.5487, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:40, 20.86s/it][A

tensor(0.5365, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:22, 20.90s/it][A

tensor(0.6055, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:52<24:00, 20.87s/it][A

loss: tensor(0.5314, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:12<23:37, 20.84s/it][A

tensor(0.8427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:33<23:17, 20.85s/it][A

tensor(0.5307, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:54<22:56, 20.86s/it][A

tensor(0.7500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:15<22:35, 20.85s/it][A

tensor(0.7089, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:36<22:14, 20.86s/it][A

tensor(0.6193, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:57<21:50, 20.81s/it][A

loss: tensor(0.7230, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:17<21:27, 20.77s/it][A

loss: tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:38<21:06, 20.77s/it][A

tensor(0.6853, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:59<20:48, 20.82s/it][A

loss: tensor(0.6631, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:20<20:31, 20.87s/it][A

tensor(0.5709, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:41<20:09, 20.85s/it][A

tensor(0.6131, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:02<19:48, 20.85s/it][A

tensor(0.5450, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:22<19:27, 20.84s/it][A

loss: tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:43<19:07, 20.86s/it][A

tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:04<18:44, 20.83s/it][A

tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:25<18:24, 20.84s/it][A

tensor(0.7371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:46<18:02, 20.82s/it][A

tensor(0.5938, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:07<17:42, 20.83s/it][A

tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:27<17:20, 20.82s/it][A

loss: tensor(0.4999, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:48<16:57, 20.77s/it][A

loss: tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:09<16:37, 20.78s/it][A

tensor(0.5509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:29<16:15, 20.75s/it][A

tensor(0.6217, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:50<15:54, 20.74s/it][A

loss: tensor(0.6007, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:11<15:30, 20.69s/it][A

tensor(0.5897, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:31<15:08, 20.64s/it][A

loss: tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:52<14:46, 20.60s/it][A

tensor(0.6199, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:12<14:24, 20.59s/it][A

tensor(0.6072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:33<14:04, 20.59s/it][A

tensor(0.5315, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:54<13:43, 20.59s/it][A

loss: tensor(0.6947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:14<13:22, 20.58s/it][A

tensor(0.5220, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:35<13:01, 20.58s/it][A

loss: tensor(0.5766, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:55<12:41, 20.59s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:16<12:21, 20.60s/it][A

tensor(0.7254, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6035, device='cuda:0', grad_fn=<NllLossBackward>)


 58%|█████▊    | 48/83 [16:37<12:02, 20.63s/it][A

loss: 


 59%|█████▉    | 49/83 [16:57<11:41, 20.64s/it][A

tensor(0.6892, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:18<11:21, 20.64s/it][A

tensor(0.6489, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:39<11:01, 20.68s/it][A

tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:59<10:40, 20.67s/it][A

tensor(0.5166, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:20<10:19, 20.64s/it]

loss: tensor(0.6186, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 65%|██████▌   | 54/83 [18:41<09:58, 20.63s/it][A

tensor(0.4983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:01<09:36, 20.59s/it][A

tensor(0.5240, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:22<09:16, 20.62s/it][A

loss: tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:42<08:55, 20.59s/it][A

loss: tensor(0.4711, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:03<08:34, 20.56s/it][A

loss: tensor(0.7307, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:23<08:13, 20.58s/it][A

tensor(0.5619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:44<07:53, 20.59s/it][A

loss: 


 73%|███████▎  | 61/83 [21:05<07:32, 20.59s/it][A

tensor(0.5499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:25<07:12, 20.60s/it][A

tensor(0.5038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:46<06:52, 20.62s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:07<06:32, 20.64s/it][A

tensor(0.5854, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:27<06:11, 20.66s/it][A

loss: tensor(0.7071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.4945, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:48<05:51, 20.65s/it][A

loss: 


 81%|████████  | 67/83 [23:09<05:31, 20.71s/it][A

tensor(0.6476, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:29<05:10, 20.68s/it][A

tensor(0.4878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:50<04:49, 20.66s/it][A

tensor(0.4878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6074, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:11<04:29, 20.70s/it][A

loss: 


 86%|████████▌ | 71/83 [24:32<04:09, 20.78s/it][A

tensor(0.5416, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:52<03:48, 20.78s/it][A

tensor(0.5608, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:13<03:27, 20.73s/it][A

loss: tensor(0.7690, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:34<03:06, 20.69s/it][A

tensor(0.5403, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [25:54<02:45, 20.65s/it][A

loss: tensor(0.5701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:15<02:24, 20.64s/it][A

tensor(0.6995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:35<02:03, 20.63s/it][A

tensor(0.5647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:56<01:43, 20.64s/it][A

tensor(0.4653, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:17<01:22, 20.65s/it][A

tensor(0.6341, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:37<01:01, 20.66s/it][A

tensor(0.6692, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:58<00:41, 20.65s/it][A

tensor(0.5631, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:19<00:20, 20.68s/it][A

loss: tensor(0.6963, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:26<00:00, 20.56s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6058344047471701

	train acc: 0.6546584063526835

	training prec: 0.8518815214294228

	training rec: 0.6546584063526835

	training f1: 0.7078455712108213

	Current Learning rate:  0.0008



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.65s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.63s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.63s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6332820916459674

	Validation acc: 0.761656746031746

	Validation prec: 0.8414441605779753

	Validation rec: 0.761656746031746

	Validation f1: 0.7904929901469838
loss: 


  1%|          | 1/83 [00:20<28:06, 20.57s/it][A

tensor(0.6417, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:41<27:44, 20.55s/it][A

loss: tensor(0.5131, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:01<27:23, 20.55s/it][A

loss: tensor(0.5998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:13, 20.67s/it][A

tensor(0.5495, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:56, 20.73s/it][A

tensor(0.5725, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:38, 20.77s/it][A

tensor(0.6002, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:16, 20.75s/it][A

tensor(0.5098, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5436, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:45<25:55, 20.74s/it][A

loss: 


 11%|█         | 9/83 [03:06<25:35, 20.75s/it][A

tensor(0.4801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:12, 20.72s/it][A

tensor(0.6445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:50, 20.70s/it][A

tensor(0.5217, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:27, 20.67s/it][A

tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:08, 20.69s/it][A

tensor(0.5171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:49<23:44, 20.65s/it][A

tensor(0.6879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:26, 20.68s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:31<23:06, 20.70s/it][A

loss: tensor(0.5590, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:51<22:44, 20.67s/it][A

tensor(0.6107, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:23, 20.66s/it][A

tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:33<22:02, 20.67s/it][A

tensor(0.5755, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:53<21:41, 20.66s/it][A

tensor(0.5300, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:14<21:24, 20.72s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:35<21:00, 20.67s/it][A

loss: tensor(0.4978, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:55<20:40, 20.67s/it][A

loss: tensor(0.7721, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:16<20:18, 20.65s/it][A

tensor(0.6589, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5913, device='cuda:0', grad_fn=<NllLossBackward>)


 30%|███       | 25/83 [08:37<19:58, 20.66s/it][A

loss: 


 31%|███▏      | 26/83 [08:57<19:36, 20.63s/it][A

tensor(0.6578, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:18<19:16, 20.66s/it][A

tensor(0.5826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:38<18:54, 20.64s/it][A

tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:59<18:34, 20.64s/it][A

tensor(0.5479, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:20<18:15, 20.67s/it][A

tensor(0.4678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:40<17:54, 20.67s/it][A

tensor(0.6446, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:01<17:32, 20.63s/it][A

loss: tensor(0.5264, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:22<17:11, 20.64s/it][A

loss: tensor(0.7033, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:42<16:50, 20.62s/it][A

tensor(0.7434, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:03<16:29, 20.62s/it][A

tensor(0.7387, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:23<16:07, 20.58s/it][A

loss: tensor(0.7484, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:44<15:46, 20.58s/it][A

loss: tensor(0.4622, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:04<15:24, 20.55s/it][A

loss: tensor(0.5864, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:25<15:04, 20.56s/it][A

loss: tensor(0.5523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:46<14:47, 20.64s/it][A

tensor(0.5673, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6009, device='cuda:0', grad_fn=<NllLossBackward>)


 49%|████▉     | 41/83 [14:06<14:27, 20.66s/it][A

loss: tensor(0.5755, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:27<14:07, 20.66s/it][A

loss: 


 52%|█████▏    | 43/83 [14:48<13:46, 20.65s/it][A

tensor(0.6721, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:08<13:26, 20.67s/it][A

tensor(0.6472, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:29<13:06, 20.69s/it][A

tensor(0.6280, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:50<12:48, 20.76s/it][A

loss: tensor(0.5762, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:11<12:28, 20.78s/it][A

tensor(0.5401, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:32<12:06, 20.75s/it][A

loss: tensor(0.5519, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:52<11:45, 20.76s/it][A

tensor(0.4358, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:13<11:26, 20.81s/it][A

tensor(0.7469, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:34<11:05, 20.81s/it][A

tensor(0.7501, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [17:55<10:44, 20.79s/it][A

loss: tensor(0.5299, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:16<10:23, 20.80s/it][A

tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:37<10:03, 20.82s/it][A

tensor(0.7329, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [18:57<09:42, 20.79s/it][A

loss: tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:18<09:20, 20.78s/it][A

tensor(0.6002, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:39<08:59, 20.77s/it][A

loss: tensor(0.5879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7113, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:00<08:38, 20.75s/it][A

loss: 


 71%|███████   | 59/83 [20:20<08:18, 20.76s/it][A

tensor(0.5366, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:41<07:57, 20.75s/it][A

tensor(0.5814, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.4982, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [21:02<07:35, 20.72s/it][A

loss: 


 75%|███████▍  | 62/83 [21:22<07:14, 20.68s/it][A

tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:43<06:53, 20.68s/it][A

tensor(0.7128, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:04<06:32, 20.68s/it][A

loss: tensor(0.7779, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:24<06:11, 20.65s/it][A

tensor(0.5802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:45<05:51, 20.65s/it][A

tensor(0.5156, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:06<05:30, 20.66s/it][A

tensor(0.7264, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6623, device='cuda:0', grad_fn=<NllLossBackward>)


 82%|████████▏ | 68/83 [23:26<05:10, 20.68s/it][A

loss: 


 83%|████████▎ | 69/83 [23:47<04:49, 20.65s/it][A

tensor(0.4763, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:07<04:27, 20.62s/it][A

loss: tensor(0.6747, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:28<04:07, 20.65s/it][A

tensor(0.8710, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [24:49<03:47, 20.70s/it][A

loss: tensor(0.4604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:10<03:27, 20.73s/it][A

tensor(0.6148, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:30<03:06, 20.72s/it][A

loss: tensor(0.7054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:51<02:45, 20.74s/it][A

tensor(0.6043, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:12<02:25, 20.75s/it][A

loss: tensor(0.7245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:33<02:04, 20.81s/it][A

tensor(0.6809, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:54<01:43, 20.78s/it][A

tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:14<01:23, 20.79s/it][A

tensor(0.5428, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:35<01:02, 20.77s/it][A

loss: tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:56<00:41, 20.76s/it][A

tensor(0.5720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:17<00:20, 20.75s/it][A

tensor(0.6276, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:24<00:00, 20.53s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6980, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6090529862656651

	train acc: 0.6688629518072289

	training prec: 0.8552940205970772

	training rec: 0.6688629518072289

	training f1: 0.7178499588963225

	Current Learning rate:  0.0007714285714285715



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:08<01:44,  2.67s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.67s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.67s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.65s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.65s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.60902258398987

	Validation acc: 0.5446428571428571

	Validation prec: 0.8683880501788958

	Validation rec: 0.5446428571428571

	Validation f1: 0.6157044608678257
loss: 


  1%|          | 1/83 [00:20<28:10, 20.61s/it][A

tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:50, 20.62s/it][A

tensor(0.5460, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:01<27:26, 20.59s/it][A

loss: tensor(0.6289, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:22<27:05, 20.58s/it][A

loss: tensor(0.5489, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:43<26:49, 20.63s/it][A

loss: tensor(0.6745, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:03<26:29, 20.65s/it][A

loss: tensor(0.5927, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:14, 20.71s/it][A

tensor(0.8650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:57, 20.77s/it][A

tensor(0.7030, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:37, 20.77s/it][A

tensor(0.5294, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:19, 20.81s/it][A

tensor(0.6438, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:55, 20.77s/it][A

tensor(0.7075, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:08<24:33, 20.75s/it][A

loss: tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:12, 20.75s/it][A

tensor(0.5494, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:50<23:51, 20.75s/it][A

tensor(0.6101, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:10<23:29, 20.73s/it][A

loss: tensor(0.6058, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:31<23:11, 20.77s/it][A

loss: tensor(0.5570, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:52<22:51, 20.78s/it][A

tensor(0.5992, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:13<22:33, 20.82s/it][A

loss: tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:34<22:14, 20.85s/it][A

tensor(0.6962, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:54<21:51, 20.81s/it][A

tensor(0.6816, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:15<21:28, 20.78s/it][A

loss: tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:36<21:08, 20.80s/it][A

tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:57<20:45, 20.76s/it][A

loss: tensor(0.6135, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:17<20:22, 20.72s/it][A

tensor(0.5897, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5657, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:38<20:01, 20.71s/it][A

loss: 


 31%|███▏      | 26/83 [08:59<19:38, 20.68s/it][A

tensor(0.5133, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:19<19:17, 20.68s/it][A

tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:40<18:57, 20.68s/it][A

loss: tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:01<18:37, 20.69s/it][A

loss: tensor(0.5672, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:21<18:17, 20.72s/it][A

tensor(0.4748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:42<17:57, 20.73s/it][A

tensor(0.5892, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:03<17:38, 20.76s/it][A

loss: tensor(0.8199, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:24<17:19, 20.79s/it][A

loss: tensor(0.7075, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:45<16:59, 20.80s/it][A

loss: tensor(0.6141, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:05<16:37, 20.77s/it][A

loss: tensor(0.5506, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:26<16:16, 20.77s/it][A

loss: tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:47<15:55, 20.78s/it][A

loss: tensor(0.7260, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:08<15:32, 20.73s/it][A

loss: tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:28<15:11, 20.71s/it][A

tensor(0.6584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:49<14:50, 20.71s/it][A

tensor(0.5008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:10<14:30, 20.71s/it][A

tensor(0.6590, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:30<14:08, 20.69s/it][A

tensor(0.5631, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:51<13:47, 20.70s/it][A

loss: tensor(0.6720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:12<13:27, 20.71s/it][A

tensor(0.5386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:33<13:08, 20.76s/it][A

tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:53<12:47, 20.75s/it][A

loss: tensor(0.5227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:14<12:27, 20.77s/it][A

tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:35<12:05, 20.73s/it][A

tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:56<11:43, 20.71s/it][A

tensor(0.7383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:16<11:23, 20.71s/it][A

tensor(0.5127, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:37<11:01, 20.67s/it][A

tensor(0.4456, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:58<10:42, 20.73s/it][A

tensor(0.5976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:18<10:22, 20.75s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:39<10:03, 20.80s/it][A

tensor(0.5733, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:00<09:43, 20.84s/it][A

tensor(0.6458, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:21<09:23, 20.85s/it][A

loss: tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:42<09:02, 20.88s/it][A

tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:03<08:42, 20.89s/it][A

tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:24<08:20, 20.85s/it][A

loss: tensor(0.5533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:45<07:59, 20.83s/it][A

tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:05<07:38, 20.83s/it][A

tensor(0.5700, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:26<07:16, 20.80s/it][A

tensor(0.6073, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:47<06:55, 20.78s/it][A

tensor(0.5584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:08<06:34, 20.78s/it][A

tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:28<06:14, 20.79s/it][A

tensor(0.7792, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:49<05:53, 20.77s/it][A

loss: tensor(0.6096, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:10<05:32, 20.80s/it]

loss: tensor(0.5635, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 82%|████████▏ | 68/83 [23:31<05:12, 20.80s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:52<04:51, 20.83s/it][A

tensor(0.5125, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:12<04:30, 20.79s/it][A

loss: tensor(0.5256, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:33<04:09, 20.82s/it][A
 87%|████████▋ | 72/83 [24:54<03:48, 20.81s/it]

loss: tensor(0.6771, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 88%|████████▊ | 73/83 [25:15<03:28, 20.81s/it][A

tensor(0.5406, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5541, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:36<03:07, 20.84s/it][A

loss: 


 90%|█████████ | 75/83 [25:57<02:46, 20.83s/it][A

tensor(0.6458, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:18<02:25, 20.86s/it][A

tensor(0.6401, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:38<02:05, 20.84s/it][A

tensor(0.5437, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:59<01:44, 20.85s/it][A

tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:20<01:23, 20.86s/it][A

tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:41<01:02, 20.85s/it][A

tensor(0.7647, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:02<00:41, 20.80s/it][A

tensor(0.5378, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:22<00:20, 20.78s/it][A

loss: tensor(0.6027, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:29<00:00, 20.60s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6382, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6134506155209369

	train acc: 0.6596214403066812

	training prec: 0.853928516553692

	training rec: 0.6596214403066812

	training f1: 0.7154177926155214

	Current Learning rate:  0.0007428571428571429



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.68s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.65s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.65s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.5910422113679704

	Validation acc: 0.6145833333333334

	Validation prec: 0.8644326406756797

	Validation rec: 0.6145833333333334

	Validation f1: 0.6806950045472127



  1%|          | 1/83 [00:20<28:40, 20.98s/it][A

loss: tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:09, 20.86s/it][A

tensor(0.5755, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:49, 20.86s/it][A

tensor(0.5370, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:23<27:18, 20.74s/it][A

loss: tensor(0.5039, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:43<26:51, 20.66s/it][A

loss: tensor(0.5342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:29, 20.64s/it][A

tensor(0.6535, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:04, 20.58s/it][A

tensor(0.4889, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:45<25:40, 20.54s/it][A

loss: tensor(0.5658, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:05<25:18, 20.51s/it][A

tensor(0.7507, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:26<25:01, 20.56s/it][A

tensor(0.5606, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:47<24:42, 20.60s/it][A

loss: tensor(0.5050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:07<24:20, 20.57s/it][A

tensor(0.6319, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:28<24:00, 20.58s/it][A

loss: tensor(0.6978, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:48<23:36, 20.53s/it][A

loss: tensor(0.4384, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:09<23:15, 20.53s/it][A

loss: tensor(0.5220, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:29<22:54, 20.51s/it][A

loss: tensor(0.6363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:50<22:37, 20.56s/it][A

tensor(0.5271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:10<22:14, 20.53s/it][A

tensor(0.7310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:31<21:54, 20.54s/it][A

tensor(0.7388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:51<21:35, 20.56s/it][A

tensor(0.4808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:12<21:16, 20.59s/it][A

tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:33<20:54, 20.57s/it][A

tensor(0.6344, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:53<20:34, 20.57s/it][A

tensor(0.5644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:14<20:16, 20.62s/it][A

tensor(0.5117, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:35<19:59, 20.68s/it][A

tensor(0.5701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:55<19:39, 20.69s/it][A

tensor(0.6519, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:16<19:17, 20.67s/it][A

tensor(0.8858, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:37<18:57, 20.68s/it]

loss: tensor(0.5006, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 35%|███▍      | 29/83 [09:57<18:35, 20.66s/it][A

tensor(0.5956, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:18<18:15, 20.67s/it][A

tensor(0.6051, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7735, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:39<17:54, 20.67s/it][A

loss: 


 39%|███▊      | 32/83 [10:59<17:32, 20.65s/it][A

tensor(0.5611, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:20<17:12, 20.64s/it][A

tensor(0.5244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:41<16:51, 20.65s/it][A

tensor(0.6026, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:01<16:29, 20.62s/it]

loss: tensor(0.6026, device='cuda:0', grad_fn=<NllLossBackward>)


[A
 43%|████▎     | 36/83 [12:22<16:09, 20.63s/it][A

loss: tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:42<15:48, 20.63s/it][A

tensor(0.6778, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:03<15:28, 20.63s/it][A

tensor(0.5790, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:24<15:07, 20.63s/it][A

tensor(0.7777, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:44<14:47, 20.65s/it][A

tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:05<14:26, 20.63s/it][A

tensor(0.6881, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:25<14:04, 20.61s/it][A

tensor(0.5649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:46<13:44, 20.62s/it][A

tensor(0.5625, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:07<13:23, 20.60s/it][A

loss: tensor(0.5973, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:27<13:01, 20.58s/it][A

tensor(0.5495, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:48<12:41, 20.58s/it][A

loss: tensor(0.5786, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:08<12:21, 20.59s/it][A

tensor(0.5506, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:29<12:00, 20.59s/it][A

tensor(0.4766, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [16:50<11:39, 20.58s/it][A

loss: tensor(0.5661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:10<11:19, 20.59s/it][A

tensor(0.5804, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:31<10:58, 20.57s/it][A

loss: tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:51<10:37, 20.56s/it][A

tensor(0.5030, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:12<10:16, 20.55s/it][A

tensor(0.5147, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:32<09:55, 20.53s/it][A

loss: tensor(0.5515, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6969, device='cuda:0', grad_fn=<NllLossBackward>)


 66%|██████▋   | 55/83 [18:53<09:34, 20.50s/it][A

loss: 


 67%|██████▋   | 56/83 [19:13<09:14, 20.55s/it][A

tensor(0.6984, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:34<08:57, 20.66s/it][A

tensor(0.5585, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:55<08:37, 20.69s/it][A

tensor(0.5814, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:16<08:15, 20.64s/it][A

loss: tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:36<07:54, 20.63s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6012, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [20:57<07:33, 20.60s/it][A
 75%|███████▍  | 62/83 [21:17<07:12, 20.61s/it][A

loss: tensor(0.6412, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:38<06:52, 20.60s/it][A

tensor(0.5913, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:58<06:30, 20.58s/it][A

tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:19<06:10, 20.59s/it][A

tensor(0.6944, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:40<05:49, 20.56s/it][A

tensor(0.5658, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:00<05:28, 20.56s/it][A

tensor(0.5847, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:21<05:08, 20.57s/it][A

loss: tensor(0.5661, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:41<04:48, 20.58s/it][A

tensor(0.6679, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:02<04:27, 20.57s/it][A

tensor(0.5659, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:22<04:06, 20.54s/it][A

loss: tensor(0.5395, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [24:43<03:45, 20.52s/it][A

loss: tensor(0.5831, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:03<03:25, 20.50s/it][A

loss: tensor(0.7347, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:24<03:04, 20.53s/it][A

loss: tensor(0.7985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:45<02:45, 20.65s/it][A

tensor(0.6152, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5305, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:05<02:24, 20.64s/it][A

loss: 


 93%|█████████▎| 77/83 [26:26<02:04, 20.68s/it][A

tensor(0.6084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:47<01:43, 20.76s/it][A

tensor(0.5642, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:08<01:23, 20.79s/it][A

tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:29<01:02, 20.83s/it][A

tensor(0.5797, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [27:50<00:41, 20.86s/it][A

loss: tensor(0.5591, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:11<00:20, 20.87s/it][A

tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:18<00:00, 20.46s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6445, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6016082752899952

	train acc: 0.6684522179627601

	training prec: 0.8518604308565744

	training rec: 0.6684522179627601

	training f1: 0.7229551333587904

	Current Learning rate:  0.0007142857142857143



  2%|▏         | 1/42 [00:02<01:47,  2.62s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:28,  2.67s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.66s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:37<01:13,  2.63s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.6123887399832407

	Validation acc: 0.5669642857142857

	Validation prec: 0.8639347290374559

	Validation rec: 0.5669642857142857

	Validation f1: 0.6380176852603535
loss: 


  1%|          | 1/83 [00:20<28:01, 20.51s/it][A

tensor(0.6321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:47, 20.59s/it][A

tensor(0.5469, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:01<27:26, 20.58s/it][A

loss: tensor(0.5650, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.4975, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:22<27:08, 20.62s/it][A
  6%|▌         | 5/83 [01:42<26:42, 20.55s/it][A

loss: tensor(0.6940, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:03<26:18, 20.50s/it][A

tensor(0.5437, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:23<25:56, 20.48s/it][A

tensor(0.5593, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:44<25:37, 20.49s/it][A

loss: tensor(0.6250, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:04<25:16, 20.50s/it][A

loss: tensor(0.5243, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:25<25:03, 20.60s/it][A

tensor(0.5349, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:46<24:42, 20.59s/it][A

tensor(0.4827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:06<24:25, 20.63s/it][A

tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:27<24:09, 20.71s/it][A

tensor(0.7361, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:48<23:51, 20.74s/it][A

tensor(0.5533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:09<23:33, 20.78s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:30<23:11, 20.76s/it][A

loss: tensor(0.5109, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:50<22:50, 20.76s/it][A

tensor(0.5888, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:11<22:25, 20.71s/it][A

loss: tensor(0.7882, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:32<22:03, 20.67s/it][A

tensor(0.5933, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6406, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:52<21:39, 20.63s/it][A

loss: 


 25%|██▌       | 21/83 [07:13<21:18, 20.62s/it][A

tensor(0.6671, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:33<20:58, 20.63s/it][A

tensor(0.6384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:54<20:34, 20.58s/it][A

tensor(0.5109, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:14<20:13, 20.58s/it][A

tensor(0.5062, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5041, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:35<19:53, 20.58s/it][A

loss: 


 31%|███▏      | 26/83 [08:56<19:33, 20.59s/it][A

tensor(0.5754, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:16<19:13, 20.60s/it][A

tensor(0.5754, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:37<18:56, 20.66s/it][A

tensor(0.4945, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:58<18:39, 20.73s/it][A

tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:19<18:18, 20.73s/it][A

tensor(0.5308, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:39<17:59, 20.75s/it][A

tensor(0.4757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:00<17:36, 20.72s/it][A

tensor(0.5544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:21<17:13, 20.67s/it][A

tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:41<16:51, 20.65s/it][A

tensor(0.6334, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:02<16:30, 20.63s/it][A

tensor(0.5166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:23<16:10, 20.65s/it][A

tensor(0.5174, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:43<15:50, 20.67s/it][A

tensor(0.6321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:04<15:30, 20.67s/it][A

tensor(0.7016, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6186, device='cuda:0', grad_fn=<NllLossBackward>)


 47%|████▋     | 39/83 [13:25<15:12, 20.73s/it][A

loss: 


 48%|████▊     | 40/83 [13:45<14:50, 20.70s/it][A

tensor(0.6427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:06<14:28, 20.68s/it][A

tensor(0.5878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:27<14:07, 20.67s/it][A

tensor(0.8099, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:47<13:47, 20.69s/it][A

loss: tensor(0.7381, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:08<13:27, 20.71s/it][A

tensor(0.6808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:29<13:06, 20.70s/it][A

tensor(0.5423, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:50<12:46, 20.71s/it]

loss: tensor(0.6723, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 57%|█████▋    | 47/83 [16:10<12:25, 20.72s/it][A

tensor(0.6687, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:31<12:03, 20.67s/it][A

tensor(0.6494, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [16:51<11:40, 20.62s/it][A

loss: 


 60%|██████    | 50/83 [17:12<11:20, 20.61s/it][A

tensor(0.6795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:33<10:59, 20.62s/it][A

tensor(0.6199, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [17:53<10:38, 20.60s/it][A

loss: tensor(0.5788, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:14<10:16, 20.56s/it][A

loss: tensor(0.6314, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:34<09:56, 20.57s/it][A

tensor(0.5100, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [18:55<09:35, 20.54s/it][A

loss: tensor(0.6249, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:15<09:13, 20.52s/it][A

loss: tensor(0.4803, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:36<08:53, 20.50s/it][A

loss: tensor(0.5453, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:56<08:33, 20.54s/it][A

tensor(0.6001, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:17<08:12, 20.53s/it][A

loss: tensor(0.7353, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:37<07:52, 20.54s/it][A

tensor(0.5960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:58<07:32, 20.58s/it][A

tensor(0.7176, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:19<07:11, 20.57s/it][A

loss: tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:39<06:51, 20.56s/it][A

tensor(0.6656, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:00<06:30, 20.55s/it][A

tensor(0.5742, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:20<06:10, 20.57s/it][A

tensor(0.7536, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:41<05:49, 20.57s/it][A

tensor(0.4883, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:01<05:29, 20.57s/it][A

tensor(0.7260, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:22<05:08, 20.56s/it][A

loss: tensor(0.5392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:42<04:47, 20.56s/it][A

tensor(0.6972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:03<04:27, 20.55s/it][A

tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:24<04:06, 20.56s/it][A

tensor(0.7114, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:44<03:46, 20.55s/it][A

tensor(0.5554, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:05<03:25, 20.56s/it][A

tensor(0.5284, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:25<03:05, 20.60s/it][A

tensor(0.6813, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:46<02:44, 20.62s/it][A

tensor(0.5503, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:07<02:24, 20.63s/it][A

tensor(0.5357, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:28<02:04, 20.69s/it][A

tensor(0.5597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:48<01:43, 20.74s/it][A

tensor(0.5286, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:09<01:22, 20.74s/it][A

tensor(0.7161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:30<01:02, 20.71s/it][A

tensor(0.4596, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [27:51<00:41, 20.78s/it][A

loss: tensor(0.7162, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:11<00:20, 20.78s/it][A

tensor(0.5695, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:19<00:00, 20.47s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4600, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6033369317112198

	train acc: 0.6699240142387732

	training prec: 0.8544213474034459

	training rec: 0.6699240142387732

	training f1: 0.7234806205003572

	Current Learning rate:  0.0006857142857142857



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:44,  2.67s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.68s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:16<01:36,  2.67s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.68s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.66s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.68s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:39<01:12,  2.67s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.67s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5933202086460023

	Validation acc: 0.6453373015873016

	Validation prec: 0.8640278834879724

	Validation rec: 0.6453373015873016

	Validation f1: 0.7081774413723999
loss: 


  1%|          | 1/83 [00:20<28:16, 20.69s/it][A

tensor(0.6070, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:00, 20.75s/it][A

tensor(0.6823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:37, 20.72s/it][A

tensor(0.5732, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:15, 20.71s/it][A

tensor(0.5549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<27:00, 20.78s/it][A

tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:04<26:43, 20.82s/it][A

loss: tensor(0.6151, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:25<26:19, 20.78s/it][A

loss: tensor(0.5285, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:53, 20.71s/it][A

tensor(0.4894, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:06<25:32, 20.71s/it][A

loss: tensor(0.5629, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:27<25:09, 20.68s/it][A

loss: tensor(0.6545, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:47<24:46, 20.64s/it][A

loss: tensor(0.7063, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:08<24:25, 20.63s/it][A

loss: tensor(0.5208, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:29<24:05, 20.65s/it]

loss: tensor(0.4713, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 17%|█▋        | 14/83 [04:49<23:47, 20.69s/it][A

tensor(0.6807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:27, 20.69s/it][A

tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:31<23:07, 20.71s/it][A

tensor(0.6444, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:52<22:47, 20.72s/it][A

tensor(0.5237, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:25, 20.70s/it][A

tensor(0.6119, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:33<22:05, 20.71s/it][A

loss: tensor(0.6486, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:54<21:43, 20.70s/it][A

tensor(0.5602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:14<21:24, 20.71s/it][A

tensor(0.5369, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:35<21:05, 20.75s/it][A

tensor(0.6061, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:56<20:44, 20.74s/it][A

tensor(0.6001, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:17<20:22, 20.71s/it][A

tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:37<20:00, 20.70s/it][A

tensor(0.5387, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:58<19:37, 20.66s/it][A

tensor(0.6239, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:18<19:16, 20.65s/it][A

loss: tensor(0.6034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:39<18:54, 20.63s/it][A

tensor(0.7077, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:00<18:34, 20.64s/it][A

tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:20<18:14, 20.66s/it][A

tensor(0.5612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:41<17:54, 20.66s/it][A

tensor(0.5036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:02<17:33, 20.66s/it][A

loss: 


 40%|███▉      | 33/83 [11:22<17:12, 20.65s/it][A

tensor(0.6075, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:43<16:52, 20.66s/it][A

tensor(0.5161, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:04<16:31, 20.66s/it][A

tensor(0.5018, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:24<16:10, 20.64s/it][A

tensor(0.5677, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6771, device='cuda:0', grad_fn=<NllLossBackward>)


 45%|████▍     | 37/83 [12:45<15:48, 20.62s/it][A

loss: 


 46%|████▌     | 38/83 [13:05<15:27, 20.61s/it][A

tensor(0.6854, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:26<15:05, 20.58s/it][A

loss: tensor(0.5247, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:47<14:44, 20.57s/it][A

tensor(0.5413, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5422, device='cuda:0', grad_fn=<NllLossBackward>)


 49%|████▉     | 41/83 [14:07<14:23, 20.56s/it][A
 51%|█████     | 42/83 [14:28<14:01, 20.53s/it][A

loss: tensor(0.6172, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:48<13:42, 20.57s/it][A

tensor(0.4952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:09<13:21, 20.56s/it][A

tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:29<13:01, 20.56s/it][A

tensor(0.6228, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:50<12:41, 20.57s/it][A

tensor(0.6725, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:10<12:20, 20.57s/it][A

loss: tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:31<12:00, 20.58s/it][A

tensor(0.6247, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:52<11:42, 20.67s/it][A

tensor(0.6130, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:13<11:21, 20.65s/it][A

tensor(0.6608, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:33<11:00, 20.63s/it][A

tensor(0.5892, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7893, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [17:54<10:39, 20.62s/it][A
 64%|██████▍   | 53/83 [18:14<10:19, 20.63s/it][A

loss: tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:35<09:57, 20.60s/it][A

loss: tensor(0.5632, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [18:55<09:36, 20.58s/it][A

loss: tensor(0.4817, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:16<09:15, 20.57s/it][A

loss: tensor(0.5503, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:36<08:54, 20.56s/it][A

loss: tensor(0.6637, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [19:58<08:40, 20.84s/it][A

loss: tensor(0.6157, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:20<08:25, 21.08s/it][A

loss: tensor(0.5651, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:41<08:05, 21.09s/it][A

tensor(0.5177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:02<07:48, 21.28s/it][A

tensor(0.5802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:23<07:25, 21.19s/it][A

tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:44<07:00, 21.04s/it][A

tensor(0.5258, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:05<06:37, 20.94s/it][A

tensor(0.5115, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:26<06:15, 20.86s/it][A

tensor(0.5549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:46<05:53, 20.80s/it][A

tensor(0.5851, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:07<05:32, 20.75s/it][A

tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:27<05:10, 20.72s/it][A

tensor(0.6595, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:48<04:50, 20.72s/it][A

loss: tensor(0.5119, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:09<04:29, 20.70s/it][A

loss: tensor(0.5308, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6037, device='cuda:0', grad_fn=<NllLossBackward>)


 86%|████████▌ | 71/83 [24:30<04:09, 20.75s/it][A
 87%|████████▋ | 72/83 [24:51<03:48, 20.77s/it][A

loss: tensor(0.5743, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:11<03:27, 20.74s/it][A

loss: tensor(0.5811, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:32<03:06, 20.71s/it][A

loss: tensor(0.6114, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:52<02:45, 20.69s/it][A

tensor(0.5606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:13<02:25, 20.74s/it][A

tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:34<02:04, 20.76s/it][A

tensor(0.6035, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [26:55<01:43, 20.76s/it][A

loss: tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:16<01:22, 20.73s/it][A

loss: tensor(0.7145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:36<01:02, 20.74s/it][A

tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:57<00:41, 20.73s/it][A

tensor(0.7290, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:18<00:20, 20.72s/it][A

loss: tensor(0.5591, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:25<00:00, 20.55s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7460, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6000298179057707

	train acc: 0.6603402245345016

	training prec: 0.8549714272915375

	training rec: 0.6603402245345016

	training f1: 0.7160240169381071

	Current Learning rate:  0.0006571428571428571



  2%|▏         | 1/42 [00:02<01:51,  2.71s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.69s/it][A
  7%|▋         | 3/42 [00:08<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.67s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.66s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.67s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.67s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.65s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.65s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.65s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6046041079929897

	Validation acc: 0.5964781746031745

	Validation prec: 0.8659714851600528

	Validation rec: 0.5964781746031745

	Validation f1: 0.6686939596955407
loss: 


  1%|          | 1/83 [00:20<28:33, 20.89s/it][A

tensor(0.4372, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:06, 20.82s/it][A

tensor(0.5002, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:42, 20.78s/it][A

tensor(0.5478, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:23<27:18, 20.74s/it][A

loss: tensor(0.7337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:57, 20.73s/it][A

tensor(0.6373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:35, 20.72s/it][A

tensor(0.5380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:14, 20.72s/it][A

tensor(0.5862, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:53, 20.71s/it][A

tensor(0.6656, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:06<25:33, 20.72s/it][A

loss: tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:27<25:11, 20.70s/it][A

loss: tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:48, 20.67s/it][A

tensor(0.6719, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:08<24:26, 20.66s/it][A

loss: tensor(0.5143, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:06, 20.67s/it][A

tensor(0.6844, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:49<23:46, 20.68s/it][A

loss: tensor(0.9009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:29, 20.72s/it][A

tensor(0.6411, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:31<23:09, 20.74s/it][A

tensor(0.5891, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:52<22:48, 20.74s/it][A

tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:13<22:29, 20.76s/it][A

tensor(0.5552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:33<22:07, 20.74s/it][A

tensor(0.5841, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:54<21:43, 20.70s/it][A

tensor(0.7201, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:14<21:20, 20.66s/it][A

tensor(0.6380, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:35<20:57, 20.61s/it][A

loss: tensor(0.5516, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:56<20:36, 20.61s/it][A

tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:16<20:17, 20.63s/it][A

tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:37<19:56, 20.64s/it][A

tensor(0.5962, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:57<19:35, 20.62s/it][A

tensor(0.7210, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:18<19:14, 20.62s/it][A

tensor(0.5910, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:39<18:53, 20.60s/it][A

tensor(0.5378, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:59<18:33, 20.62s/it][A

tensor(0.4386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:20<18:12, 20.61s/it][A

tensor(0.5937, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:40<17:51, 20.61s/it][A

loss: tensor(0.6002, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:01<17:31, 20.62s/it][A

loss: tensor(0.5501, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:22<17:11, 20.62s/it][A

loss: tensor(0.7167, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:42<16:50, 20.62s/it][A

tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:03<16:27, 20.57s/it][A

tensor(0.5342, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:23<16:06, 20.57s/it][A

tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:44<15:45, 20.55s/it][A

tensor(0.5184, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:04<15:24, 20.55s/it][A

tensor(0.5446, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:25<15:03, 20.53s/it][A

loss: tensor(0.5804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:46<14:44, 20.56s/it][A

tensor(0.6841, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:06<14:22, 20.54s/it][A

loss: tensor(0.4370, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:27<14:04, 20.61s/it][A

tensor(0.5289, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:48<13:45, 20.65s/it][A

tensor(0.5514, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6355, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:08<13:25, 20.65s/it][A
 54%|█████▍    | 45/83 [15:29<13:05, 20.67s/it][A

loss: tensor(0.5771, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:50<12:45, 20.69s/it][A

tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:10<12:25, 20.70s/it][A

tensor(0.5719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:31<12:05, 20.74s/it][A

tensor(0.5581, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [16:52<11:45, 20.74s/it][A

loss: tensor(0.5079, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:13<11:23, 20.70s/it][A

loss: tensor(0.5976, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:33<11:02, 20.70s/it][A

loss: tensor(0.6122, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [17:54<10:40, 20.67s/it][A

loss: tensor(0.5250, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:14<10:18, 20.63s/it][A

loss: tensor(0.5461, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:35<09:58, 20.62s/it][A

loss: tensor(0.8496, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [18:56<09:36, 20.60s/it][A

loss: tensor(0.6541, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:16<09:16, 20.60s/it][A

loss: tensor(0.6902, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:37<08:55, 20.61s/it][A

loss: tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [19:57<08:34, 20.60s/it][A

loss: tensor(0.5218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:18<08:16, 20.67s/it][A

tensor(0.5947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:39<07:56, 20.72s/it][A

tensor(0.5485, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:00<07:37, 20.79s/it][A

tensor(0.5898, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:21<07:17, 20.82s/it][A

tensor(0.6729, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:42<06:56, 20.82s/it][A

tensor(0.5666, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:03<06:36, 20.87s/it][A

tensor(0.7588, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6138, device='cuda:0', grad_fn=<NllLossBackward>)


 78%|███████▊  | 65/83 [22:23<06:14, 20.83s/it][A
 80%|███████▉  | 66/83 [22:44<05:53, 20.78s/it][A

loss: tensor(0.5698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:05<05:32, 20.77s/it][A

tensor(0.4540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:25<05:10, 20.73s/it][A

tensor(0.5621, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5493, device='cuda:0', grad_fn=<NllLossBackward>)


 83%|████████▎ | 69/83 [23:46<04:49, 20.70s/it][A
 84%|████████▍ | 70/83 [24:07<04:28, 20.68s/it][A

loss: tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.4791, device='cuda:0', grad_fn=<NllLossBackward>)


 86%|████████▌ | 71/83 [24:27<04:08, 20.68s/it][A

loss: 


 87%|████████▋ | 72/83 [24:48<03:48, 20.76s/it][A

tensor(0.7079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:09<03:27, 20.80s/it][A

tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:30<03:07, 20.82s/it][A

tensor(0.7453, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [25:51<02:46, 20.85s/it][A

loss: tensor(0.6367, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:12<02:25, 20.84s/it][A

tensor(0.5837, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:33<02:05, 20.86s/it][A

loss: tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:54<01:44, 20.89s/it][A

tensor(0.6351, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:15<01:23, 20.87s/it][A

tensor(0.6057, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:35<01:02, 20.84s/it][A

loss: tensor(0.6716, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:56<00:41, 20.78s/it][A

tensor(0.5205, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:17<00:20, 20.76s/it][A

tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:24<00:00, 20.53s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5732, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6050343919231231

	train acc: 0.6532892935377875

	training prec: 0.8551848821335488

	training rec: 0.6532892935377875

	training f1: 0.7076475006438504

	Current Learning rate:  0.0006285714285714285



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:08<01:44,  2.68s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.67s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.66s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.66s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:20,  2.68s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:39<01:12,  2.67s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.66s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:04,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6453299564974648

	Validation acc: 0.8137400793650794

	Validation prec: 0.8309550665404517

	Validation rec: 0.8137400793650794

	Validation f1: 0.8199685590189089
loss: 


  1%|          | 1/83 [00:20<28:13, 20.66s/it][A

tensor(0.5501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:55, 20.69s/it][A

tensor(0.5351, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:33, 20.66s/it][A

tensor(0.5270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:16, 20.71s/it][A

tensor(0.5791, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:55, 20.71s/it][A

tensor(0.6692, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:33, 20.69s/it][A

tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:11, 20.68s/it][A

tensor(0.5821, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:51, 20.69s/it][A

tensor(0.5840, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:36, 20.76s/it][A

tensor(0.5234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:15, 20.75s/it][A

tensor(0.5950, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:54, 20.76s/it][A

tensor(0.6059, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:08<24:32, 20.74s/it][A

loss: tensor(0.6086, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:12, 20.75s/it][A

tensor(0.5989, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:50<23:51, 20.74s/it][A

tensor(0.5667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:32, 20.78s/it][A

tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:31<23:13, 20.80s/it][A

tensor(0.6425, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:52<22:55, 20.84s/it][A

loss: tensor(0.5500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:13<22:33, 20.83s/it][A

tensor(0.5040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:34<22:09, 20.77s/it][A

tensor(0.6175, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:54<21:47, 20.75s/it][A

tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:15<21:24, 20.71s/it][A

tensor(0.5141, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:36<21:02, 20.70s/it][A

tensor(0.5858, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:56<20:42, 20.71s/it][A

loss: tensor(0.5282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:17<20:21, 20.71s/it][A

tensor(0.4580, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:38<20:02, 20.73s/it][A

loss: tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:59<19:42, 20.74s/it][A

tensor(0.5272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:19<19:20, 20.73s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:40<18:58, 20.70s/it][A

tensor(0.5266, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:01<18:37, 20.70s/it][A

tensor(0.6429, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:22<18:19, 20.75s/it][A

tensor(0.6415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:43<18:02, 20.81s/it][A

tensor(0.5658, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:03<17:41, 20.82s/it][A

loss: tensor(0.6419, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:24<17:19, 20.78s/it][A

tensor(0.6789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:45<16:56, 20.75s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:05<16:34, 20.72s/it][A

tensor(0.7014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:26<16:13, 20.72s/it][A

tensor(0.6037, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:47<15:52, 20.71s/it][A

tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:07<15:30, 20.69s/it][A

tensor(0.5402, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:28<15:12, 20.73s/it][A

tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:49<14:51, 20.72s/it][A

tensor(0.6998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:10<14:32, 20.76s/it][A

tensor(0.7027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:31<14:11, 20.76s/it][A

tensor(0.5642, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:51<13:49, 20.74s/it][A

tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:12<13:26, 20.67s/it][A

tensor(0.7300, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7863, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:32<13:04, 20.64s/it][A

loss: 


 55%|█████▌    | 46/83 [15:53<12:44, 20.66s/it][A

tensor(0.6146, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6097, device='cuda:0', grad_fn=<NllLossBackward>)


 57%|█████▋    | 47/83 [16:14<12:22, 20.63s/it][A

loss: 


 58%|█████▊    | 48/83 [16:34<12:01, 20.61s/it][A

tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:55<11:40, 20.59s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:15<11:19, 20.60s/it][A

tensor(0.5438, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:36<11:00, 20.64s/it][A

loss: tensor(0.6494, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:57<10:39, 20.64s/it][A

tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:17<10:18, 20.61s/it][A

tensor(0.6718, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:38<09:57, 20.60s/it][A

loss: tensor(0.6084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:58<09:37, 20.61s/it][A

tensor(0.6060, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:19<09:17, 20.64s/it][A

loss: tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:40<08:57, 20.68s/it][A

tensor(0.5961, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:01<08:36, 20.67s/it][A

tensor(0.6037, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:21<08:15, 20.63s/it]

loss: tensor(0.5245, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 72%|███████▏  | 60/83 [20:42<07:54, 20.63s/it][A

tensor(0.6572, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:03<07:35, 20.69s/it][A

tensor(0.5845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5723, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:23<07:14, 20.70s/it][A
 76%|███████▌  | 63/83 [21:44<06:53, 20.69s/it][A

loss: tensor(0.5580, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:05<06:33, 20.73s/it][A

tensor(0.5270, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:26<06:13, 20.76s/it][A

tensor(0.5364, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:46<05:52, 20.74s/it][A

tensor(0.5351, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:07<05:31, 20.71s/it][A

tensor(0.5987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:28<05:10, 20.69s/it][A

tensor(0.5602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:48<04:49, 20.68s/it][A

tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:09<04:28, 20.68s/it][A

tensor(0.5467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:30<04:08, 20.68s/it][A

tensor(0.6852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:50<03:47, 20.66s/it][A

tensor(0.6826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:11<03:26, 20.66s/it][A

tensor(0.7206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:32<03:05, 20.66s/it][A

tensor(0.4946, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:52<02:45, 20.66s/it][A

tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:13<02:24, 20.67s/it][A

tensor(0.6796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:34<02:04, 20.67s/it][A

tensor(0.5928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:54<01:43, 20.66s/it][A

tensor(0.5182, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:15<01:22, 20.66s/it][A

tensor(0.6326, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:36<01:01, 20.67s/it][A

tensor(0.6703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:56<00:41, 20.66s/it][A

tensor(0.6453, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:17<00:20, 20.67s/it][A

tensor(0.7081, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:24<00:00, 20.54s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4633, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6009296598922775

	train acc: 0.6713958105147865

	training prec: 0.8557222229977949

	training rec: 0.6713958105147865

	training f1: 0.724913001063099

	Current Learning rate:  0.0006



  2%|▏         | 1/42 [00:02<01:49,  2.67s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.65s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.62s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.66s/it][A
 21%|██▏       | 9/42 [00:23<01:28,  2.69s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.69s/it][A
 26%|██▌       | 11/42 [00:29<01:23,  2.70s/it][A
 29%|██▊       | 12/42 [00:31<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:18,  2.69s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:40<01:12,  2.67s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.67s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.65s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.66s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:53<00:57,  2


	Validation loss: 0.6002603336459115

	Validation acc: 0.6081349206349207

	Validation prec: 0.8565855228724774

	Validation rec: 0.6081349206349207

	Validation f1: 0.6741648618534553
loss: 


  1%|          | 1/83 [00:20<28:11, 20.63s/it][A

tensor(0.5592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5693, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:41<27:57, 20.71s/it][A

loss: 


  4%|▎         | 3/83 [01:02<27:36, 20.71s/it][A

tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:22<27:18, 20.74s/it][A

loss: tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:43<26:54, 20.70s/it][A

loss: tensor(0.6298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:31, 20.67s/it][A

tensor(0.5272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:11, 20.68s/it][A

tensor(0.5598, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:52, 20.69s/it][A

tensor(0.6693, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:06<25:34, 20.74s/it][A

loss: tensor(0.5924, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:26<25:10, 20.69s/it][A

loss: tensor(0.6386, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:47<24:52, 20.73s/it][A

loss: tensor(0.5706, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:08<24:27, 20.67s/it][A

loss: tensor(0.6123, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:08, 20.69s/it][A

tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:49<23:48, 20.71s/it][A

tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:10<23:27, 20.70s/it][A

loss: tensor(0.5655, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:31<23:07, 20.70s/it][A

loss: tensor(0.5307, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:51<22:45, 20.68s/it][A

loss: tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:12<22:22, 20.65s/it][A

loss: tensor(0.5433, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:33<22:00, 20.64s/it][A

loss: tensor(0.6201, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:53<21:40, 20.65s/it][A

loss: tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:14<21:20, 20.65s/it][A

loss: tensor(0.4714, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:34<20:59, 20.64s/it][A

loss: tensor(0.4756, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:55<20:39, 20.67s/it][A

tensor(0.5800, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:16<20:16, 20.62s/it][A

tensor(0.5103, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<19:57, 20.64s/it][A

tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.4950, device='cuda:0', grad_fn=<NllLossBackward>)


 31%|███▏      | 26/83 [08:57<19:34, 20.60s/it][A

loss: 


 33%|███▎      | 27/83 [09:18<19:13, 20.60s/it][A

tensor(0.5878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:38<18:53, 20.61s/it][A

tensor(0.5107, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:59<18:33, 20.61s/it][A

tensor(0.6857, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:19<18:12, 20.61s/it][A

tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:40<17:51, 20.60s/it][A

tensor(0.7675, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:01<17:31, 20.62s/it][A

tensor(0.7890, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:21<17:10, 20.62s/it][A

tensor(0.5058, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:42<16:49, 20.59s/it][A

loss: tensor(0.6537, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:02<16:27, 20.58s/it][A

tensor(0.5243, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:23<16:08, 20.61s/it][A

tensor(0.5390, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:44<15:47, 20.59s/it][A

loss: tensor(0.6530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:04<15:26, 20.59s/it][A

tensor(0.4865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.8817, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:25<15:05, 20.57s/it][A
 48%|████▊     | 40/83 [13:45<14:43, 20.54s/it][A

loss: tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:06<14:23, 20.56s/it][A

tensor(0.5857, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:26<14:01, 20.53s/it][A

tensor(0.6415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:47<13:42, 20.56s/it][A

tensor(0.5181, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:07<13:20, 20.54s/it][A

tensor(0.7731, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:28<13:01, 20.56s/it][A

tensor(0.6066, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:48<12:40, 20.55s/it][A

tensor(0.5546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:09<12:20, 20.56s/it][A

tensor(0.5992, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:30<11:59, 20.57s/it]

loss: tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 59%|█████▉    | 49/83 [16:50<11:39, 20.57s/it][A

tensor(0.4470, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:11<11:18, 20.55s/it][A

tensor(0.6671, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:31<10:57, 20.54s/it][A

tensor(0.4645, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:52<10:36, 20.53s/it][A

tensor(0.5199, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:12<10:16, 20.53s/it]

loss: tensor(0.6184, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 65%|██████▌   | 54/83 [18:33<09:54, 20.49s/it][A

tensor(0.6502, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:53<09:33, 20.50s/it][A

tensor(0.5867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:14<09:14, 20.54s/it][A

tensor(0.5572, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:34<08:54, 20.55s/it]

loss: tensor(0.6086, device='cuda:0', grad_fn=<NllLossBackward>)


[A
 70%|██████▉   | 58/83 [19:55<08:32, 20.52s/it][A

loss: tensor(0.7220, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:15<08:12, 20.53s/it][A

tensor(0.6972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:36<07:52, 20.52s/it][A

tensor(0.5522, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:57<07:32, 20.56s/it][A

tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:17<07:12, 20.58s/it][A

tensor(0.5530, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:38<06:51, 20.56s/it][A

tensor(0.6753, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [21:58<06:29, 20.52s/it][A

loss: tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:19<06:09, 20.51s/it][A

loss: tensor(0.5909, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:39<05:48, 20.50s/it][A

loss: tensor(0.4761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:00<05:29, 20.57s/it][A

tensor(0.5367, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:20<05:08, 20.59s/it][A

tensor(0.5578, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:41<04:47, 20.56s/it][A

tensor(0.6367, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:01<04:26, 20.50s/it]

loss: tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)


[A
 86%|████████▌ | 71/83 [24:22<04:05, 20.48s/it][A

loss: tensor(0.5569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:42<03:45, 20.50s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:03<03:25, 20.54s/it][A

tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:23<03:05, 20.56s/it][A

tensor(0.6105, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:44<02:44, 20.58s/it][A

tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:05<02:24, 20.59s/it][A

tensor(0.5062, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)


 93%|█████████▎| 77/83 [26:25<02:03, 20.58s/it][A

loss: 


 94%|█████████▍| 78/83 [26:46<01:42, 20.57s/it][A

tensor(0.6590, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:06<01:22, 20.55s/it][A

tensor(0.5559, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:27<01:01, 20.53s/it][A

loss: tensor(0.5436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:47<00:41, 20.56s/it][A

tensor(0.5380, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:08<00:20, 20.55s/it][A

loss: tensor(0.6267, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:15<00:00, 20.43s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5579, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.595324735325503

	train acc: 0.6710535323110625

	training prec: 0.8545831589269712

	training rec: 0.6710535323110625

	training f1: 0.7259318036176295

	Current Learning rate:  0.0005714285714285714



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.66s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.65s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.5885062661199343

	Validation acc: 0.7142857142857143

	Validation prec: 0.855243109672881

	Validation rec: 0.7142857142857143

	Validation f1: 0.7595469893507413



  1%|          | 1/83 [00:20<28:25, 20.80s/it][A

loss: tensor(0.6590, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:54, 20.68s/it][A

tensor(0.5857, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:39, 20.75s/it][A

tensor(0.5689, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:22<27:18, 20.74s/it][A

loss: tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:54, 20.70s/it][A

tensor(0.5006, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:33, 20.69s/it][A

tensor(0.4958, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5312, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:24<26:12, 20.69s/it][A

loss: 


 10%|▉         | 8/83 [02:45<25:50, 20.67s/it][A

tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:29, 20.67s/it][A

tensor(0.6979, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:26<25:06, 20.64s/it][A

loss: tensor(0.4961, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:47<24:43, 20.61s/it][A

loss: tensor(0.5085, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:08<24:24, 20.62s/it][A

loss: tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:28<24:03, 20.62s/it][A

loss: tensor(0.6642, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:49<23:40, 20.58s/it][A

loss: tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:09<23:22, 20.63s/it][A

tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:04, 20.67s/it][A

tensor(0.6128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:51<22:45, 20.69s/it][A

tensor(0.6348, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:24, 20.69s/it][A
 23%|██▎       | 19/83 [06:32<22:03, 20.69s/it][A

tensor(0.5509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:53<21:45, 20.72s/it][A

tensor(0.5071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:14<21:24, 20.71s/it][A

tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:34<21:02, 20.70s/it][A

loss: tensor(0.5022, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:55<20:39, 20.66s/it][A

tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:15<20:16, 20.61s/it][A

tensor(0.6179, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<19:55, 20.61s/it][A

tensor(0.7403, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:57<19:33, 20.58s/it][A

tensor(0.6329, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:17<19:11, 20.56s/it][A

loss: tensor(0.6815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:38<18:50, 20.56s/it][A

tensor(0.6020, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:58<18:29, 20.55s/it][A

tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:19<18:09, 20.56s/it][A

tensor(0.5671, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:39<17:48, 20.55s/it][A

tensor(0.6218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:00<17:26, 20.52s/it][A

tensor(0.4941, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5593, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:20<17:06, 20.53s/it][A

loss: 


 41%|████      | 34/83 [11:41<16:46, 20.55s/it][A

tensor(0.7138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:02<16:27, 20.58s/it][A

tensor(0.5321, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:22<16:06, 20.57s/it][A

loss: tensor(0.5413, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:43<15:44, 20.53s/it][A

loss: tensor(0.6787, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:03<15:23, 20.51s/it][A

loss: tensor(0.5086, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:23<15:02, 20.51s/it][A

loss: tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:44<14:42, 20.52s/it][A

loss: tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:05<14:24, 20.59s/it][A

tensor(0.5142, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5593, device='cuda:0', grad_fn=<NllLossBackward>)


 51%|█████     | 42/83 [14:25<14:02, 20.54s/it][A

loss: 


 52%|█████▏    | 43/83 [14:46<13:41, 20.55s/it][A

tensor(0.7444, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:06<13:22, 20.59s/it][A

tensor(0.6068, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:27<13:04, 20.65s/it][A

tensor(0.5648, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:48<12:45, 20.68s/it][A

tensor(0.5989, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:09<12:25, 20.72s/it][A

tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:29<12:04, 20.71s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:50<11:45, 20.74s/it][A

tensor(0.5638, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.4991, device='cuda:0', grad_fn=<NllLossBackward>)


 60%|██████    | 50/83 [17:11<11:24, 20.74s/it][A

loss: 


 61%|██████▏   | 51/83 [17:32<11:05, 20.80s/it][A

tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:53<10:43, 20.75s/it][A

tensor(0.6027, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:13<10:20, 20.70s/it][A

loss: tensor(0.7728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:34<10:00, 20.72s/it][A

tensor(0.5473, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:55<09:39, 20.69s/it][A

tensor(0.4503, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:15<09:17, 20.64s/it][A

tensor(0.7292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:36<08:56, 20.64s/it][A

tensor(0.5354, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:56<08:35, 20.64s/it][A

tensor(0.6585, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:17<08:14, 20.61s/it][A

tensor(0.5525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:38<07:56, 20.72s/it][A

tensor(0.4724, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:59<07:36, 20.77s/it][A

tensor(0.6380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:20<07:16, 20.79s/it][A

tensor(0.5071, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:40<06:55, 20.78s/it][A

tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:01<06:34, 20.75s/it][A

loss: tensor(0.5294, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:22<06:12, 20.71s/it][A

tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:42<05:52, 20.73s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:03<05:32, 20.76s/it][A

tensor(0.5150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:24<05:11, 20.77s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:45<04:51, 20.82s/it][A

tensor(0.6921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:06<04:30, 20.83s/it][A

tensor(0.6408, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:27<04:09, 20.80s/it][A

tensor(0.6069, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:47<03:48, 20.77s/it][A

tensor(0.6577, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:08<03:27, 20.78s/it][A

tensor(0.8172, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:29<03:06, 20.76s/it][A

tensor(0.7324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:50<02:46, 20.75s/it][A

tensor(0.8709, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:10<02:25, 20.74s/it][A

tensor(0.5973, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:31<02:04, 20.74s/it][A

tensor(0.5223, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:52<01:43, 20.72s/it][A

tensor(0.5239, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:12<01:22, 20.71s/it][A

loss: tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:33<01:02, 20.68s/it][A

tensor(0.5572, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:54<00:41, 20.67s/it][A

tensor(0.5889, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:14<00:20, 20.68s/it][A

loss: tensor(0.5416, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:21<00:00, 20.51s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(1.0073, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6064570280442755

	train acc: 0.6661589539978094

	training prec: 0.855405596309957

	training rec: 0.6661589539978094

	training f1: 0.7216167926633072

	Current Learning rate:  0.0005428571428571428



  2%|▏         | 1/42 [00:02<01:47,  2.62s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.67s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.66s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.67s/it][A
 43%|████▎     | 18/42 [00:47<01:04,  2.68s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.608995129664739

	Validation acc: 0.7579365079365079

	Validation prec: 0.8483880558003507

	Validation rec: 0.7579365079365079

	Validation f1: 0.7896669197017874
loss: 


  1%|          | 1/83 [00:20<28:37, 20.95s/it][A

tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:04, 20.80s/it][A

tensor(0.4739, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:02<27:33, 20.67s/it][A

loss: tensor(0.4928, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:22<27:13, 20.67s/it][A

loss: tensor(0.5916, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:55, 20.71s/it][A

tensor(0.6247, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:33, 20.69s/it][A

tensor(0.5187, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:24<26:08, 20.63s/it][A

loss: tensor(0.6294, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:45<25:48, 20.65s/it][A

loss: tensor(0.5101, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:31, 20.69s/it][A

tensor(0.6046, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:26<25:11, 20.71s/it][A

loss: tensor(0.6638, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:48<25:02, 20.86s/it][A

tensor(0.5963, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:09<24:44, 20.91s/it][A

loss: tensor(0.5267, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:30<24:26, 20.94s/it][A

loss: tensor(0.5943, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:51<24:06, 20.97s/it][A

loss: tensor(0.6487, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:12<23:42, 20.92s/it][A

loss: tensor(0.5184, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:32<23:13, 20.81s/it][A

loss: tensor(0.6601, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:53<22:49, 20.75s/it][A

tensor(0.6846, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:13<22:26, 20.71s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:34<22:05, 20.71s/it][A

loss: tensor(0.7076, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:55<21:48, 20.77s/it][A

loss: tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:16<21:27, 20.77s/it][A

tensor(0.5052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:36<21:03, 20.72s/it][A

tensor(0.5839, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:57<20:42, 20.71s/it][A

tensor(0.5857, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:18<20:20, 20.69s/it][A

tensor(0.4819, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:38<19:58, 20.67s/it][A

tensor(0.4856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:59<19:38, 20.67s/it][A

tensor(0.5273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:20<19:17, 20.67s/it][A

tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:40<18:56, 20.67s/it][A

tensor(0.5767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:01<18:36, 20.68s/it][A

tensor(0.5770, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:22<18:18, 20.72s/it][A

tensor(0.6359, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:43<17:57, 20.72s/it][A

loss: tensor(0.5539, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:03<17:36, 20.71s/it][A

loss: tensor(0.5637, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:24<17:14, 20.68s/it][A

loss: tensor(0.5339, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:45<16:54, 20.70s/it][A

tensor(0.5469, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:05<16:33, 20.70s/it][A

tensor(0.5064, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:26<16:14, 20.72s/it][A

tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:47<15:51, 20.69s/it][A

loss: tensor(0.5777, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5319, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:07<15:30, 20.68s/it][A

loss: 


 47%|████▋     | 39/83 [13:28<15:09, 20.66s/it][A

tensor(0.4867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:49<14:48, 20.66s/it][A

tensor(0.5796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:09<14:27, 20.65s/it][A

tensor(0.4775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:30<14:07, 20.67s/it][A

tensor(0.4641, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:51<13:46, 20.66s/it][A

tensor(0.5963, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:11<13:25, 20.65s/it][A

tensor(0.5953, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:32<13:04, 20.65s/it][A

tensor(0.6269, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:53<12:44, 20.67s/it][A

tensor(0.6059, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:13<12:23, 20.66s/it][A

tensor(0.5569, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:34<12:02, 20.66s/it][A

tensor(0.7317, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:55<11:44, 20.72s/it][A

tensor(0.5238, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:15<11:23, 20.70s/it][A

tensor(0.5528, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:36<11:00, 20.65s/it][A

loss: tensor(0.5269, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:57<10:41, 20.69s/it][A

tensor(0.7878, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:18<10:22, 20.76s/it][A

loss: tensor(0.4864, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:38<10:02, 20.78s/it][A

tensor(0.5087, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:59<09:42, 20.80s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:20<09:21, 20.81s/it][A

tensor(0.4778, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5848, device='cuda:0', grad_fn=<NllLossBackward>)


 69%|██████▊   | 57/83 [19:41<09:00, 20.79s/it][A

loss: 


 70%|██████▉   | 58/83 [20:02<08:39, 20.79s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:22<08:19, 20.80s/it][A

tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:43<07:58, 20.79s/it][A

loss: tensor(0.7231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:04<07:36, 20.77s/it][A

tensor(0.5541, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:25<07:15, 20.76s/it][A

tensor(0.5829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:45<06:54, 20.72s/it][A

tensor(0.5920, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.4798, device='cuda:0', grad_fn=<NllLossBackward>)


 77%|███████▋  | 64/83 [22:06<06:33, 20.71s/it][A

loss: 


 78%|███████▊  | 65/83 [22:27<06:13, 20.72s/it][A

tensor(0.5171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:47<05:52, 20.71s/it][A

tensor(0.5706, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:08<05:30, 20.67s/it][A

loss: tensor(0.5522, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:29<05:09, 20.64s/it][A

tensor(0.7449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:49<04:48, 20.63s/it][A

tensor(0.5380, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:10<04:27, 20.61s/it][A

tensor(0.5985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:30<04:07, 20.60s/it][A

tensor(0.7373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:51<03:46, 20.58s/it][A

tensor(0.6145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:11<03:25, 20.55s/it][A

tensor(0.5611, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:32<03:04, 20.53s/it][A

loss: tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:52<02:43, 20.50s/it][A

tensor(0.6361, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:13<02:23, 20.53s/it][A

tensor(0.5417, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:34<02:03, 20.60s/it][A

loss: tensor(0.4206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:55<01:43, 20.68s/it][A

tensor(0.6854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:15<01:22, 20.66s/it][A

tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:36<01:01, 20.65s/it][A

tensor(0.6034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:56<00:41, 20.64s/it][A

tensor(0.6594, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:17<00:20, 20.63s/it][A

loss: tensor(0.6382, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:24<00:00, 20.54s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.8530, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5858817280056965

	train acc: 0.6746132256297919

	training prec: 0.8545973634586324

	training rec: 0.6746132256297919

	training f1: 0.7284898225972686

	Current Learning rate:  0.0005142857142857142



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.66s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.67s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.68s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.66s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6060588501748585

	Validation acc: 0.6205357142857143

	Validation prec: 0.8649830857490939

	Validation rec: 0.6205357142857143

	Validation f1: 0.6880001797407521
loss: 


  1%|          | 1/83 [00:20<28:16, 20.69s/it][A

tensor(0.5704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:55, 20.68s/it][A

tensor(0.5619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:32, 20.66s/it][A

tensor(0.5365, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:19, 20.75s/it][A

tensor(0.5248, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<27:00, 20.78s/it][A

tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:41, 20.80s/it][A

tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:17, 20.75s/it][A

tensor(0.5126, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:54, 20.72s/it][A

tensor(0.5414, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:06<25:34, 20.74s/it][A

loss: tensor(0.5367, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:13, 20.74s/it][A

tensor(0.6104, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:50, 20.70s/it][A

tensor(0.6051, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:26, 20.65s/it][A

tensor(0.5580, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:29<24:05, 20.66s/it][A

loss: tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:49<23:45, 20.66s/it][A

loss: tensor(0.5363, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:28, 20.72s/it][A

tensor(0.7365, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:31<23:07, 20.70s/it][A

loss: tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:51<22:43, 20.65s/it][A

loss: tensor(0.5424, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:21, 20.64s/it][A

tensor(0.5400, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:33<21:59, 20.62s/it][A

loss: tensor(0.6196, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:53<21:38, 20.62s/it][A

loss: tensor(0.7511, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:14<21:21, 20.67s/it][A

loss: tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:35<21:03, 20.71s/it][A

loss: tensor(0.7130, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:56<20:42, 20.71s/it][A

loss: tensor(0.6794, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:16<20:20, 20.69s/it][A

loss: tensor(0.5912, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:37<19:57, 20.65s/it][A

loss: tensor(0.6369, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [08:57<19:35, 20.62s/it][A

loss: tensor(0.4715, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:18<19:13, 20.59s/it][A

loss: tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:39<18:56, 20.66s/it][A

tensor(0.5782, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:59<18:37, 20.70s/it][A

tensor(0.5010, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:19<17:59, 20.36s/it][A

tensor(0.6103, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:40<17:43, 20.45s/it][A

loss: tensor(0.6022, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:01<17:32, 20.64s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:22<17:17, 20.74s/it][A

tensor(0.6069, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:43<17:00, 20.82s/it][A

tensor(0.6173, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:03<16:36, 20.75s/it]

loss: tensor(0.5772, device='cuda:0', grad_fn=<NllLossBackward>)


[A
 43%|████▎     | 36/83 [12:24<16:13, 20.72s/it][A

loss: tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:45<15:51, 20.69s/it][A

tensor(0.5664, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:05<15:30, 20.67s/it][A

tensor(0.6475, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:26<15:09, 20.68s/it][A

loss: tensor(0.5518, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:47<14:50, 20.71s/it][A

tensor(0.6047, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:08<14:32, 20.77s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:28<14:11, 20.78s/it][A

tensor(0.6356, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:49<13:50, 20.77s/it][A

loss: tensor(0.5905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:10<13:29, 20.75s/it][A

tensor(0.5312, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:31<13:09, 20.77s/it][A

tensor(0.5960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7489, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:51<12:47, 20.75s/it][A

loss: 


 57%|█████▋    | 47/83 [16:12<12:28, 20.78s/it][A

tensor(0.5313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:33<12:07, 20.79s/it][A

tensor(0.5425, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:54<11:48, 20.85s/it][A

tensor(0.5245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:15<11:30, 20.91s/it][A

tensor(0.7052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:36<11:10, 20.94s/it][A

tensor(0.7689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:57<10:48, 20.93s/it][A

tensor(0.5922, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:18<10:28, 20.96s/it][A

tensor(0.5467, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:39<10:07, 20.95s/it][A

tensor(0.5489, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:00<09:46, 20.96s/it][A

tensor(0.5567, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:21<09:26, 20.98s/it][A

tensor(0.6210, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:42<09:04, 20.96s/it][A

tensor(0.6004, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:02<08:41, 20.84s/it][A

loss: tensor(0.5834, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5802, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:23<08:18, 20.79s/it][A
 72%|███████▏  | 60/83 [20:44<07:57, 20.74s/it][A

loss: tensor(0.5896, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:04<07:36, 20.73s/it][A

tensor(0.5804, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:25<07:15, 20.73s/it][A

tensor(0.6058, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:46<06:54, 20.73s/it][A

loss: tensor(0.5587, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)


 77%|███████▋  | 64/83 [22:07<06:34, 20.77s/it][A

loss: 


 78%|███████▊  | 65/83 [22:28<06:14, 20.78s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:48<05:53, 20.77s/it][A

tensor(0.5335, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:09<05:33, 20.82s/it][A

tensor(0.5374, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:30<05:13, 20.87s/it][A

tensor(0.5513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:51<04:51, 20.83s/it][A

tensor(0.5582, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:12<04:30, 20.77s/it][A

tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:32<04:08, 20.74s/it][A

tensor(0.7817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:53<03:47, 20.72s/it][A

tensor(0.6026, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:14<03:26, 20.70s/it][A

tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:34<03:05, 20.64s/it][A

loss: tensor(0.5360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:55<02:44, 20.62s/it][A

tensor(0.5427, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:15<02:24, 20.61s/it][A

tensor(0.5422, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:36<02:03, 20.59s/it][A

tensor(0.5982, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [26:56<01:42, 20.58s/it][A

loss: tensor(0.5658, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:17<01:22, 20.54s/it][A

loss: tensor(0.6057, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:37<01:01, 20.57s/it][A

tensor(0.5775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5920, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [27:58<00:41, 20.55s/it][A

loss: 


 99%|█████████▉| 82/83 [28:19<00:20, 20.56s/it][A

tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:26<00:00, 20.56s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5038, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5977511036108776

	train acc: 0.6548980010952903

	training prec: 0.8529816230660238

	training rec: 0.6548980010952903

	training f1: 0.7144241134536682

	Current Learning rate:  0.0004857142857142857



  2%|▏         | 1/42 [00:02<01:48,  2.66s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.63s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.62s/it][A
 17%|█▋        | 7/42 [00:18<01:31,  2.62s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.62s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.62s/it][A
 24%|██▍       | 10/42 [00:26<01:23,  2.62s/it][A
 26%|██▌       | 11/42 [00:28<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.61s/it][A
 31%|███       | 13/42 [00:34<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.62s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.61s/it][A
 38%|███▊      | 16/42 [00:41<01:07,  2.61s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.62s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.61s/it][A
 45%|████▌     | 19/42 [00:49<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.5912814268044063

	Validation acc: 0.6532738095238095

	Validation prec: 0.8565091134722762

	Validation rec: 0.6532738095238095

	Validation f1: 0.7132888388047369



  1%|          | 1/83 [00:20<28:12, 20.64s/it][A

loss: tensor(0.6485, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:48, 20.60s/it][A

tensor(0.5506, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:28, 20.61s/it][A

tensor(0.5701, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:07, 20.60s/it][A

tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:42<26:45, 20.59s/it][A

tensor(0.5314, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:03<26:23, 20.56s/it][A

loss: tensor(0.7011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:02, 20.55s/it][A

tensor(0.5842, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:44<25:39, 20.53s/it][A

loss: tensor(0.5908, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:05<25:21, 20.56s/it][A

loss: tensor(0.6522, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:25<25:01, 20.57s/it][A

loss: tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:46<24:41, 20.58s/it][A

loss: tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:07<24:24, 20.63s/it][A

tensor(0.6291, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:27<24:05, 20.64s/it][A

tensor(0.5260, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:48<23:42, 20.62s/it][A

tensor(0.6118, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:08<23:19, 20.59s/it][A

loss: tensor(0.5272, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:29<22:58, 20.58s/it][A

loss: tensor(0.6708, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:49<22:38, 20.58s/it][A

loss: tensor(0.5178, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:10<22:19, 20.61s/it][A

loss: tensor(0.6799, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:31<21:59, 20.62s/it][A

tensor(0.4772, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:51<21:39, 20.63s/it][A

loss: tensor(0.5710, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:12<21:19, 20.64s/it][A

tensor(0.4961, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:33<20:59, 20.64s/it][A

loss: tensor(0.5881, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:53<20:39, 20.65s/it][A

loss: tensor(0.6953, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:14<20:21, 20.70s/it][A

tensor(0.7910, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:35<19:59, 20.67s/it][A

tensor(0.7481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:56<19:38, 20.67s/it][A

tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:16<19:17, 20.67s/it][A

tensor(0.5118, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:37<18:57, 20.68s/it][A

tensor(0.5243, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:58<18:37, 20.69s/it][A

tensor(0.5966, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:18<18:16, 20.68s/it][A

loss: tensor(0.6576, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:39<17:55, 20.69s/it][A

loss: tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)


 39%|███▊      | 32/83 [11:00<17:33, 20.67s/it][A
 40%|███▉      | 33/83 [11:20<17:12, 20.65s/it][A

loss: tensor(0.4981, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:41<16:53, 20.68s/it][A

loss: tensor(0.5347, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:02<16:33, 20.70s/it][A

loss: tensor(0.7223, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:23<16:14, 20.74s/it][A

loss: tensor(0.6552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:44<15:59, 20.85s/it][A

tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:04<15:37, 20.83s/it][A

tensor(0.6959, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:25<15:17, 20.85s/it][A

tensor(0.5682, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:46<14:56, 20.86s/it][A

tensor(0.5700, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:07<14:34, 20.82s/it][A

loss: tensor(0.4665, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:28<14:14, 20.85s/it][A

tensor(0.5960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:49<13:53, 20.84s/it][A

tensor(0.5757, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:10<13:34, 20.88s/it][A

tensor(0.5504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:30<13:13, 20.88s/it][A

tensor(0.5327, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:51<12:52, 20.87s/it][A

tensor(0.7748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:12<12:31, 20.86s/it][A

tensor(0.5445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:33<12:11, 20.91s/it][A

tensor(0.6217, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:54<11:50, 20.89s/it][A

tensor(0.5220, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:15<11:25, 20.78s/it][A

tensor(0.6034, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5914, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:35<11:04, 20.75s/it][A

loss: 


 63%|██████▎   | 52/83 [17:56<10:41, 20.69s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:16<10:19, 20.65s/it][A

tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:37<09:58, 20.64s/it][A

tensor(0.5859, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [18:58<09:37, 20.61s/it][A

loss: tensor(0.5917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:18<09:16, 20.60s/it][A

tensor(0.5668, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:39<08:55, 20.61s/it][A

tensor(0.5333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:59<08:35, 20.64s/it][A

tensor(0.7172, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:20<08:14, 20.60s/it][A

loss: tensor(0.5466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:41<07:54, 20.61s/it][A

tensor(0.5072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:01<07:33, 20.63s/it][A

tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:22<07:13, 20.65s/it][A

tensor(0.5444, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:43<06:52, 20.65s/it][A

loss: tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:03<06:31, 20.63s/it][A

tensor(0.4899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:24<06:10, 20.60s/it][A

tensor(0.6325, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:44<05:50, 20.61s/it][A

tensor(0.5399, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6242, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:05<05:30, 20.64s/it][A
 82%|████████▏ | 68/83 [23:26<05:09, 20.65s/it][A

loss: tensor(0.7095, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:46<04:49, 20.64s/it][A

tensor(0.5730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:07<04:28, 20.63s/it][A

tensor(0.5430, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:28<04:07, 20.65s/it][A

tensor(0.6795, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:48<03:47, 20.64s/it][A

tensor(0.4588, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:09<03:26, 20.65s/it][A

tensor(0.5822, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:30<03:05, 20.64s/it][A

tensor(0.8685, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)


 90%|█████████ | 75/83 [25:50<02:45, 20.63s/it][A
 92%|█████████▏| 76/83 [26:11<02:24, 20.63s/it][A

loss: tensor(0.6593, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:31<02:03, 20.64s/it][A

loss: tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:52<01:43, 20.66s/it][A

tensor(0.6608, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:13<01:22, 20.64s/it][A

loss: tensor(0.4737, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:33<01:01, 20.65s/it][A

loss: tensor(0.5609, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:54<00:41, 20.69s/it][A

tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:15<00:20, 20.68s/it][A

tensor(0.4703, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:22<00:00, 20.51s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6036295826176563

	train acc: 0.6697357612267251

	training prec: 0.854659129520366

	training rec: 0.6697357612267251

	training f1: 0.7240922811089806

	Current Learning rate:  0.00045714285714285713



  2%|▏         | 1/42 [00:02<01:46,  2.60s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:44,  2.68s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.67s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.66s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.5882213548535392

	Validation acc: 0.6609623015873016

	Validation prec: 0.8604104900046959

	Validation rec: 0.6609623015873016

	Validation f1: 0.7217636058977386
loss: 


  1%|          | 1/83 [00:20<28:28, 20.84s/it][A

tensor(0.5254, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:08, 20.84s/it][A

tensor(0.5944, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:45, 20.81s/it][A

tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:23<27:20, 20.77s/it][A

loss: tensor(0.6672, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<27:01, 20.78s/it][A

tensor(0.6035, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:37, 20.75s/it][A

tensor(0.5675, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:17, 20.75s/it][A

tensor(0.6771, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:46<25:53, 20.71s/it][A

loss: tensor(0.5574, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:33, 20.72s/it][A

tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:12, 20.72s/it][A

tensor(0.7124, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:48<24:51, 20.71s/it][A

tensor(0.5316, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:28, 20.69s/it][A

tensor(0.6088, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:07, 20.68s/it][A

tensor(0.5706, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:50<23:44, 20.64s/it][A

loss: tensor(0.6306, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:26, 20.68s/it][A

tensor(0.5566, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:31<23:05, 20.68s/it][A

loss: tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:52<22:45, 20.69s/it][A

tensor(0.6193, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:23, 20.67s/it][A

tensor(0.6277, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:33<22:01, 20.64s/it][A

loss: tensor(0.7108, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:54<21:41, 20.65s/it][A

loss: tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:14<21:21, 20.68s/it][A

loss: tensor(0.5677, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:35<21:01, 20.68s/it][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:56<20:42, 20.71s/it][A

tensor(0.5141, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:16<20:21, 20.71s/it][A

loss: tensor(0.5730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:37<20:02, 20.74s/it][A

tensor(0.5608, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:58<19:43, 20.76s/it][A

tensor(0.6093, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.4644, device='cuda:0', grad_fn=<NllLossBackward>)


 33%|███▎      | 27/83 [09:19<19:23, 20.77s/it][A

loss: 


 34%|███▎      | 28/83 [09:39<18:55, 20.65s/it][A

tensor(0.6400, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:00<18:32, 20.60s/it][A

tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:20<18:13, 20.62s/it][A

loss: tensor(0.5054, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:41<17:54, 20.67s/it][A

tensor(0.4853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:02<17:33, 20.66s/it][A

tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:23<17:13, 20.67s/it][A

tensor(0.4166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:43<16:52, 20.66s/it][A

tensor(0.6533, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:04<16:32, 20.68s/it][A

tensor(0.7236, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:25<16:12, 20.68s/it][A

loss: tensor(0.6879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:45<15:51, 20.68s/it][A

tensor(0.7303, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:06<15:30, 20.67s/it][A

loss: tensor(0.5114, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:27<15:09, 20.67s/it][A

loss: tensor(0.5220, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:47<14:49, 20.68s/it][A

loss: tensor(0.5392, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:08<14:27, 20.66s/it][A

tensor(0.5488, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:29<14:06, 20.65s/it][A

tensor(0.6603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:49<13:46, 20.66s/it][A

tensor(0.6642, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:10<13:25, 20.65s/it][A

tensor(0.5125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:31<13:05, 20.66s/it][A

tensor(0.6433, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:51<12:44, 20.66s/it][A

tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:12<12:23, 20.65s/it][A

tensor(0.4711, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:32<12:03, 20.66s/it][A

tensor(0.5394, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:53<11:42, 20.66s/it][A

tensor(0.5753, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:14<11:23, 20.70s/it][A

tensor(0.5563, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:35<11:01, 20.69s/it][A

tensor(0.5636, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:55<10:41, 20.69s/it][A

tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:16<10:20, 20.69s/it][A

tensor(0.7009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:37<09:59, 20.69s/it][A

tensor(0.5435, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:57<09:38, 20.68s/it][A

tensor(0.5938, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:18<09:18, 20.67s/it][A

tensor(0.5114, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:39<08:58, 20.71s/it][A

tensor(0.5539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:59<08:37, 20.72s/it][A

tensor(0.5974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:20<08:16, 20.69s/it][A

tensor(0.5196, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:41<07:55, 20.68s/it][A

tensor(0.5271, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [21:01<07:35, 20.68s/it][A

loss: 


 75%|███████▍  | 62/83 [21:22<07:14, 20.68s/it][A

tensor(0.5960, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:43<06:53, 20.67s/it][A

tensor(0.6098, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:04<06:33, 20.70s/it][A

tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:24<06:12, 20.70s/it][A

tensor(0.5791, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7392, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:45<05:52, 20.72s/it][A

loss: 


 81%|████████  | 67/83 [23:06<05:31, 20.73s/it][A

tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:27<05:11, 20.76s/it][A

tensor(0.4492, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:47<04:50, 20.74s/it][A

tensor(0.4719, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:08<04:29, 20.73s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:29<04:08, 20.71s/it][A

tensor(0.4510, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [24:49<03:47, 20.70s/it][A

loss: tensor(0.7351, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:10<03:26, 20.68s/it][A

tensor(0.5892, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:31<03:06, 20.68s/it][A

tensor(0.5200, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [25:51<02:45, 20.68s/it][A

loss: tensor(0.5383, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:12<02:24, 20.67s/it][A

tensor(0.4776, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:33<02:04, 20.67s/it][A

loss: tensor(0.4920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:53<01:43, 20.66s/it][A

tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:14<01:22, 20.65s/it][A

tensor(0.5998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:35<01:01, 20.66s/it][A

tensor(0.4857, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [27:55<00:41, 20.63s/it][A

loss: tensor(0.5543, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:16<00:20, 20.61s/it][A

loss: tensor(0.5257, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:23<00:00, 20.52s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.9403, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5857605704342026

	train acc: 0.6781729189485214

	training prec: 0.8573022911681751

	training rec: 0.6781729189485214

	training f1: 0.7293285304242424

	Current Learning rate:  0.00042857142857142855



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.63s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.64s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.63s/it][A
 29%|██▊       | 12/42 [00:31<01:18,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:15,  2.61s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.61s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.62s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.61s/it][A
 43%|████▎     | 18/42 [00:47<01:02,  2.62s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.63s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.6055491059308961

	Validation acc: 0.7596726190476191

	Validation prec: 0.8572366918055272

	Validation rec: 0.7596726190476191

	Validation f1: 0.7935191932044383
loss: 


  1%|          | 1/83 [00:20<28:08, 20.59s/it][A

tensor(0.5933, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:45, 20.56s/it][A

tensor(0.4989, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:24, 20.55s/it][A

tensor(0.5322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:04, 20.57s/it][A

tensor(0.5909, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6389, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:42<26:42, 20.55s/it][A

loss: 


  7%|▋         | 6/83 [02:03<26:26, 20.61s/it][A

tensor(0.5178, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:08, 20.64s/it][A

tensor(0.5905, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:44<25:47, 20.64s/it][A

tensor(0.6129, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:05<25:27, 20.64s/it][A

tensor(0.6618, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:26<25:09, 20.67s/it][A

loss: tensor(0.6234, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5512, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:46<24:49, 20.68s/it][A


loss: tensor(0.5914, device='cuda:0', grad_fn=<NllLossBackward>)


 14%|█▍        | 12/83 [04:07<24:27, 20.68s/it][A
 16%|█▌        | 13/83 [04:28<24:06, 20.66s/it][A

loss: tensor(0.6795, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:48<23:45, 20.67s/it][A

loss: tensor(0.6263, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:09<23:25, 20.68s/it][A

tensor(0.7138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:05, 20.68s/it][A

tensor(0.6057, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:50<22:42, 20.64s/it][A

tensor(0.5606, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:11<22:22, 20.65s/it][A

tensor(0.6590, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:32<22:00, 20.64s/it][A

loss: tensor(0.6400, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:52<21:38, 20.62s/it][A

loss: tensor(0.6983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:13<21:21, 20.66s/it][A

tensor(0.6136, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:34<21:01, 20.69s/it][A

tensor(0.4897, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:54<20:40, 20.68s/it][A

tensor(0.5626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:15<20:19, 20.67s/it][A

tensor(0.7597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<19:58, 20.66s/it][A

tensor(0.4997, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:56<19:37, 20.66s/it][A

tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:17<19:19, 20.71s/it][A

tensor(0.4964, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:38<19:00, 20.74s/it][A

tensor(0.5413, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [09:59<18:37, 20.69s/it][A

loss: tensor(0.5588, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:19<18:15, 20.67s/it][A

tensor(0.5107, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:40<17:55, 20.68s/it][A

loss: tensor(0.6074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:00<17:33, 20.66s/it][A

tensor(0.5836, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:21<17:13, 20.66s/it][A

tensor(0.6032, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:42<16:52, 20.67s/it][A

loss: tensor(0.6660, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:03<16:32, 20.68s/it][A

tensor(0.5336, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:23<16:12, 20.69s/it][A

tensor(0.6085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:44<15:51, 20.68s/it][A

tensor(0.7087, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:05<15:30, 20.68s/it][A

tensor(0.6108, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:25<15:09, 20.68s/it][A

tensor(0.4974, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.4987, device='cuda:0', grad_fn=<NllLossBackward>)


 48%|████▊     | 40/83 [13:46<14:49, 20.69s/it][A
 49%|████▉     | 41/83 [14:07<14:29, 20.69s/it][A

loss: tensor(0.6334, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:27<14:08, 20.68s/it][A

tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:48<13:47, 20.69s/it][A

tensor(0.7210, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:09<13:27, 20.69s/it][A

tensor(0.5252, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:29<13:06, 20.70s/it][A

tensor(0.5448, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:50<12:46, 20.70s/it][A

loss: tensor(0.6852, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:11<12:24, 20.68s/it][A

loss: tensor(0.5452, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:31<12:03, 20.67s/it]

loss: tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)


[A
 59%|█████▉    | 49/83 [16:52<11:43, 20.68s/it][A

loss: tensor(0.7598, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:13<11:21, 20.66s/it][A

tensor(0.5233, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:33<11:01, 20.67s/it][A

loss: tensor(0.6106, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:54<10:41, 20.70s/it][A

tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:15<10:21, 20.72s/it][A

loss: tensor(0.5485, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:36<10:01, 20.73s/it][A

tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:56<09:40, 20.72s/it][A

tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:17<09:20, 20.77s/it][A

tensor(0.5933, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:38<08:57, 20.68s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:58<08:36, 20.66s/it][A

tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:19<08:15, 20.65s/it][A

tensor(0.5730, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:40<07:55, 20.66s/it][A

tensor(0.6527, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [21:00<07:33, 20.63s/it][A

loss: tensor(0.5511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:21<07:13, 20.62s/it][A

tensor(0.4998, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:41<06:51, 20.57s/it][A

tensor(0.5415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:02<06:30, 20.58s/it][A

tensor(0.6379, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:22<06:10, 20.56s/it][A

tensor(0.5404, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:43<05:49, 20.57s/it][A

tensor(0.6067, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:04<05:28, 20.56s/it][A

tensor(0.6991, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:24<05:08, 20.55s/it][A

tensor(0.7554, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:44<04:47, 20.51s/it][A

loss: tensor(0.6677, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:05<04:26, 20.53s/it][A

loss: tensor(0.5790, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:26<04:06, 20.52s/it][A

loss: tensor(0.5917, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [24:46<03:46, 20.59s/it][A

loss: tensor(0.5419, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:07<03:26, 20.61s/it][A

tensor(0.6598, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:28<03:05, 20.61s/it][A

loss: tensor(0.7819, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [25:48<02:44, 20.61s/it][A

loss: 


 92%|█████████▏| 76/83 [26:09<02:23, 20.56s/it][A

tensor(0.6224, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:29<02:03, 20.56s/it][A

tensor(0.6279, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:50<01:42, 20.54s/it][A

tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:10<01:22, 20.53s/it][A

tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:31<01:01, 20.51s/it][A

tensor(0.5901, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:52<00:41, 20.61s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:12<00:20, 20.67s/it][A

tensor(0.5474, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:19<00:00, 20.48s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7613, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6066936268863907

	train acc: 0.647573247535597

	training prec: 0.8493768128537698

	training rec: 0.647573247535597

	training f1: 0.7061844090889228

	Current Learning rate:  0.0004



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.67s/it][A
  7%|▋         | 3/42 [00:07<01:42,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.63s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.64s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.65s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.601055512825648

	Validation acc: 0.6212797619047619

	Validation prec: 0.86539982110454

	Validation rec: 0.6212797619047619

	Validation f1: 0.6916582619401402



  1%|          | 1/83 [00:20<28:14, 20.67s/it][A

loss: tensor(0.5684, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:41<27:53, 20.66s/it][A

loss: tensor(0.5703, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:37, 20.72s/it][A

tensor(0.6361, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:20, 20.77s/it][A

tensor(0.6783, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:43<26:59, 20.76s/it][A

loss: tensor(0.5654, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:37, 20.75s/it][A

tensor(0.5451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5199, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:25<26:17, 20.75s/it][A
 10%|▉         | 8/83 [02:45<25:55, 20.74s/it][A

loss: tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:33, 20.72s/it][A

tensor(0.5953, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:13, 20.73s/it][A

tensor(0.6988, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:48<24:52, 20.73s/it][A

loss: tensor(0.5549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:33, 20.76s/it][A

tensor(0.5887, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:09, 20.70s/it][A

tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:49<23:42, 20.62s/it][A

loss: tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:10<23:19, 20.58s/it][A

loss: tensor(0.4401, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:30<22:56, 20.54s/it][A

loss: tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:51<22:38, 20.58s/it][A

loss: tensor(0.5687, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:12<22:17, 20.58s/it][A

loss: tensor(0.5171, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:32<21:59, 20.62s/it][A

tensor(0.5636, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:53<21:40, 20.64s/it][A

tensor(0.7038, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:13<21:16, 20.59s/it][A

loss: tensor(0.6206, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:34<20:54, 20.57s/it][A

tensor(0.6149, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:55<20:35, 20.60s/it][A

tensor(0.5559, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:15<20:13, 20.57s/it][A

loss: tensor(0.5845, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<19:52, 20.57s/it][A

tensor(0.5275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:56<19:32, 20.56s/it][A

tensor(0.6104, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:17<19:10, 20.54s/it][A

loss: tensor(0.6791, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:37<18:50, 20.55s/it][A

tensor(0.7721, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:58<18:30, 20.57s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:19<18:13, 20.62s/it][A

tensor(0.6177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:39<17:55, 20.68s/it][A

tensor(0.6873, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:00<17:35, 20.69s/it][A

tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:21<17:11, 20.62s/it][A

tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:41<16:49, 20.61s/it][A

tensor(0.4932, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:02<16:27, 20.58s/it][A

tensor(0.6130, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:23<16:10, 20.66s/it][A

tensor(0.6048, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:43<15:48, 20.62s/it][A

loss: tensor(0.5813, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:04<15:29, 20.65s/it][A

tensor(0.5537, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:24<15:07, 20.63s/it][A

loss: tensor(0.5358, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:45<14:47, 20.64s/it][A

loss: tensor(0.4859, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:06<14:25, 20.61s/it][A

loss: tensor(0.6454, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:26<14:04, 20.61s/it][A

loss: tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:47<13:44, 20.62s/it][A

loss: tensor(0.5512, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:07<13:23, 20.61s/it][A

loss: tensor(0.6740, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:28<13:02, 20.59s/it][A

loss: tensor(0.6350, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:49<12:44, 20.65s/it][A

tensor(0.5160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:09<12:22, 20.64s/it][A

tensor(0.5659, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:30<12:02, 20.64s/it][A

tensor(0.5447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:51<11:41, 20.63s/it][A

tensor(0.4710, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:11<11:21, 20.65s/it][A

tensor(0.6264, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:32<11:00, 20.65s/it][A

tensor(0.6062, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:53<10:39, 20.64s/it][A

tensor(0.6213, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:13<10:17, 20.59s/it][A

tensor(0.5940, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:34<09:57, 20.59s/it][A

loss: tensor(0.5685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:54<09:36, 20.59s/it][A

tensor(0.5397, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:15<09:15, 20.57s/it][A

tensor(0.4994, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:35<08:55, 20.59s/it][A

tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [19:56<08:34, 20.58s/it][A

loss: tensor(0.6130, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:17<08:13, 20.58s/it][A

tensor(0.5408, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:37<07:53, 20.58s/it][A

tensor(0.4917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:58<07:33, 20.61s/it][A

tensor(0.6246, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:19<07:13, 20.64s/it][A

tensor(0.4875, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:39<06:53, 20.66s/it][A

loss: tensor(0.5991, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:00<06:33, 20.70s/it][A

tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:21<06:12, 20.70s/it][A

loss: tensor(0.6202, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:42<05:52, 20.72s/it][A

tensor(0.5743, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:02<05:31, 20.74s/it][A

loss: tensor(0.6503, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:23<05:11, 20.76s/it][A

tensor(0.6747, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:44<04:50, 20.77s/it][A

tensor(0.6173, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:05<04:29, 20.74s/it][A

tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:25<04:08, 20.73s/it][A

tensor(0.6109, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [24:46<03:47, 20.72s/it][A

loss: tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5254, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:07<03:26, 20.70s/it][A
 89%|████████▉ | 74/83 [25:27<03:06, 20.69s/it][A

loss: tensor(0.5685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:48<02:45, 20.70s/it][A

tensor(0.5281, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:09<02:25, 20.72s/it][A

tensor(0.4890, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:30<02:04, 20.76s/it][A

tensor(0.5509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:50<01:43, 20.77s/it][A

tensor(0.5079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:11<01:22, 20.69s/it][A

tensor(0.5720, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:32<01:02, 20.71s/it][A

tensor(0.5546, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:52<00:41, 20.73s/it][A

tensor(0.6959, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:14<00:20, 20.84s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:21<00:00, 20.50s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.3453, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.589846966137369

	train acc: 0.6602204271631983

	training prec: 0.8544111397521403

	training rec: 0.6602204271631983

	training f1: 0.7174647839762254

	Current Learning rate:  0.00037142857142857143



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:44,  2.62s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.65s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.68s/it][A
 14%|█▍        | 6/42 [00:16<01:36,  2.69s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.67s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.67s/it][A
 21%|██▏       | 9/42 [00:24<01:28,  2.67s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.66s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:39<01:12,  2.67s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.66s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.67s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.64s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5901853477671033

	Validation acc: 0.7192460317460317

	Validation prec: 0.8499721300252093

	Validation rec: 0.7192460317460317

	Validation f1: 0.7637371313377318



  1%|          | 1/83 [00:20<28:20, 20.74s/it][A

loss: tensor(0.5319, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:41<27:58, 20.72s/it][A

loss: tensor(0.7045, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:02<27:39, 20.75s/it][A

loss: tensor(0.5511, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:22<27:18, 20.74s/it][A

loss: tensor(0.5931, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<27:01, 20.79s/it][A

tensor(0.5646, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:04<26:43, 20.82s/it][A

loss: tensor(0.5907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:25<26:23, 20.84s/it][A

tensor(0.5531, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:46<26:04, 20.86s/it][A

loss: tensor(0.5790, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:07<25:40, 20.82s/it][A

loss: tensor(0.6248, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:28<25:21, 20.84s/it][A

tensor(0.8350, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:48<25:00, 20.84s/it][A

tensor(0.5571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:09<24:42, 20.89s/it][A

tensor(0.5903, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:30<24:21, 20.88s/it][A

tensor(0.5549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:51<24:00, 20.88s/it][A

tensor(0.8176, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:12<23:40, 20.89s/it][A

tensor(0.6202, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:33<23:17, 20.86s/it][A

tensor(0.5949, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:54<22:55, 20.84s/it][A

loss: tensor(0.5471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:14<22:33, 20.83s/it][A

tensor(0.6038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:35<22:13, 20.83s/it][A

tensor(0.6560, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:56<21:50, 20.80s/it][A

tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:17<21:27, 20.77s/it][A

loss: tensor(0.6094, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:37<21:04, 20.73s/it][A

loss: tensor(0.5350, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:58<20:44, 20.74s/it][A

tensor(0.6779, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:19<20:24, 20.75s/it][A

loss: tensor(0.4825, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:40<20:05, 20.79s/it][A

loss: tensor(0.5921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:01<19:48, 20.85s/it][A

tensor(0.5561, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:22<19:26, 20.84s/it][A

tensor(0.5776, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:42<19:04, 20.81s/it][A

loss: tensor(0.5519, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:03<18:43, 20.80s/it][A

tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:24<18:21, 20.78s/it][A

tensor(0.6937, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:45<17:59, 20.75s/it][A

loss: tensor(0.6114, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:05<17:35, 20.70s/it][A

tensor(0.5509, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:26<17:13, 20.66s/it][A

loss: tensor(0.7273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:46<16:51, 20.64s/it][A

tensor(0.7272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:07<16:30, 20.64s/it][A

tensor(0.5366, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:27<16:08, 20.61s/it][A

loss: tensor(0.5985, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:48<15:47, 20.59s/it][A

tensor(0.6544, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 38/83 [13:09<15:28, 20.64s/it][A

loss: tensor(0.6513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5764, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:29<15:08, 20.64s/it][A

loss: 


 48%|████▊     | 40/83 [13:50<14:49, 20.68s/it][A

tensor(0.6322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:11<14:29, 20.70s/it][A

tensor(0.5745, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:32<14:09, 20.71s/it][A

tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:52<13:49, 20.74s/it][A

loss: tensor(0.5761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:13<13:28, 20.74s/it][A

tensor(0.5987, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:34<13:07, 20.73s/it][A

tensor(0.6073, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:55<12:47, 20.74s/it][A

loss: tensor(0.7253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:16<12:28, 20.79s/it][A

tensor(0.6679, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:36<12:06, 20.77s/it][A

tensor(0.6322, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:57<11:46, 20.79s/it][A

tensor(0.5820, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:18<11:25, 20.78s/it][A

loss: tensor(0.5248, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5535, device='cuda:0', grad_fn=<NllLossBackward>)


 61%|██████▏   | 51/83 [17:39<11:04, 20.75s/it][A

loss: 


 63%|██████▎   | 52/83 [17:59<10:43, 20.75s/it][A

tensor(0.7189, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:20<10:22, 20.74s/it][A

tensor(0.6080, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:41<10:01, 20.73s/it][A

loss: tensor(0.6076, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:01<09:40, 20.73s/it][A

tensor(0.5035, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:22<09:19, 20.72s/it][A

tensor(0.6568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:43<08:58, 20.70s/it][A

tensor(0.5313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:04<08:37, 20.69s/it][A

tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:24<08:17, 20.73s/it][A

tensor(0.5277, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:45<07:56, 20.71s/it][A

tensor(0.5390, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:06<07:34, 20.68s/it][A

tensor(0.7529, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:26<07:12, 20.59s/it][A

loss: tensor(0.5145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:46<06:50, 20.55s/it][A

tensor(0.6698, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:07<06:30, 20.57s/it][A

tensor(0.6474, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:28<06:10, 20.59s/it][A

tensor(0.7433, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:48<05:50, 20.63s/it][A

tensor(0.5229, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:09<05:29, 20.62s/it][A

loss: tensor(0.5545, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:30<05:09, 20.61s/it][A

loss: tensor(0.5242, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:50<04:48, 20.60s/it][A

loss: tensor(0.6202, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:11<04:27, 20.59s/it][A

loss: tensor(0.6527, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:32<04:07, 20.66s/it][A

tensor(0.5760, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:52<03:47, 20.66s/it][A

tensor(0.6123, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:13<03:26, 20.67s/it][A

tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:34<03:06, 20.68s/it][A

tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:54<02:45, 20.67s/it][A

tensor(0.5898, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:15<02:24, 20.68s/it][A

tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:36<02:04, 20.70s/it][A

tensor(0.6022, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:56<01:43, 20.70s/it][A

tensor(0.5586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:17<01:22, 20.69s/it][A

tensor(0.5157, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:38<01:02, 20.73s/it][A

tensor(0.5397, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:59<00:41, 20.71s/it][A

tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:19<00:20, 20.73s/it][A

tensor(0.6397, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:26<00:00, 20.57s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5144, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.6065230638865965

	train acc: 0.6614526286966046

	training prec: 0.8529723039655506

	training rec: 0.6614526286966046

	training f1: 0.7182244024222391

	Current Learning rate:  0.00034285714285714285



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.67s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.67s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.66s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.67s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.66s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.67s/it][A
 33%|███▎      | 14/42 [00:37<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.66s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.66s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5820336611497969

	Validation acc: 0.6822916666666666

	Validation prec: 0.857424283555532

	Validation rec: 0.6822916666666666

	Validation f1: 0.735900410640581



  1%|          | 1/83 [00:20<28:13, 20.65s/it][A

loss: tensor(0.5718, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:00, 20.75s/it][A

tensor(0.4146, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:36, 20.70s/it][A

tensor(0.6873, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:19, 20.76s/it][A

tensor(0.6854, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<27:00, 20.77s/it][A

tensor(0.5422, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:04<26:40, 20.79s/it]

loss: tensor(0.6731, device='cuda:0', grad_fn=<NllLossBackward>)


[A
  8%|▊         | 7/83 [02:25<26:15, 20.73s/it][A

loss: tensor(0.6726, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:54, 20.73s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:32, 20.71s/it][A

tensor(0.6771, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:11, 20.71s/it][A

tensor(0.4407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:50, 20.70s/it][A

tensor(0.4889, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:28, 20.69s/it][A

tensor(0.5797, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 13/83 [04:29<24:09, 20.71s/it][A

loss: tensor(0.5839, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:50<23:50, 20.74s/it][A

tensor(0.5194, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:10<23:30, 20.74s/it]

loss: tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 19%|█▉        | 16/83 [05:31<23:07, 20.72s/it][A

tensor(0.6222, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:52<22:45, 20.69s/it][A

loss: tensor(0.5514, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:13<22:26, 20.72s/it][A

tensor(0.5217, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:33<22:01, 20.65s/it][A

loss: tensor(0.5568, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:54<21:40, 20.64s/it][A

loss: tensor(0.4876, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:14<21:17, 20.61s/it][A

loss: tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:35<20:57, 20.62s/it][A

loss: tensor(0.5973, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:55<20:34, 20.57s/it][A

loss: tensor(0.6009, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:16<20:14, 20.59s/it][A

tensor(0.6114, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:37<19:55, 20.62s/it][A

tensor(0.5575, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:57<19:36, 20.63s/it][A

tensor(0.5621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:18<19:15, 20.63s/it][A

tensor(0.5556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:39<18:55, 20.64s/it][A

tensor(0.5167, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:59<18:34, 20.64s/it][A

tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:20<18:14, 20.64s/it][A

tensor(0.6402, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:41<17:54, 20.65s/it][A

tensor(0.5415, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:01<17:33, 20.66s/it][A

tensor(0.4630, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:22<17:12, 20.65s/it][A

tensor(0.5132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:43<16:52, 20.66s/it][A

tensor(0.6048, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:03<16:32, 20.68s/it][A

loss: tensor(0.6636, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:24<16:11, 20.67s/it][A

loss: tensor(0.5229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5484, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:45<15:51, 20.67s/it][A

loss: 


 46%|████▌     | 38/83 [13:05<15:30, 20.69s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:26<15:09, 20.67s/it][A

tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:47<14:49, 20.69s/it][A

tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:07<14:28, 20.69s/it][A

tensor(0.4999, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:28<14:08, 20.69s/it][A

loss: tensor(0.5797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:49<13:48, 20.72s/it][A

tensor(0.6544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:10<13:27, 20.72s/it][A

tensor(0.7499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:30<13:06, 20.70s/it][A

tensor(0.5643, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:51<12:45, 20.69s/it][A

tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:12<12:25, 20.70s/it][A

loss: tensor(0.5214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:32<12:04, 20.69s/it][A

tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:53<11:43, 20.69s/it][A

tensor(0.5237, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:14<11:22, 20.68s/it][A

tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:34<11:02, 20.69s/it][A

tensor(0.5853, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:55<10:40, 20.67s/it][A

tensor(0.4696, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:16<10:20, 20.67s/it][A

tensor(0.5697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:36<09:58, 20.62s/it][A

tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:57<09:37, 20.63s/it][A

tensor(0.5808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:17<09:16, 20.62s/it][A

tensor(0.6539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:38<08:55, 20.59s/it][A

tensor(0.5000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:58<08:33, 20.56s/it][A

tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:19<08:13, 20.56s/it][A

tensor(0.7120, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:40<07:53, 20.60s/it][A

tensor(0.6157, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:00<07:33, 20.62s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:21<07:12, 20.62s/it][A

tensor(0.6921, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:42<06:52, 20.64s/it][A

tensor(0.6095, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:02<06:32, 20.66s/it][A

tensor(0.7242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:23<06:12, 20.72s/it][A

tensor(0.5868, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:44<05:52, 20.74s/it][A

tensor(0.7000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:05<05:32, 20.79s/it][A

tensor(0.5888, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:26<05:12, 20.84s/it][A

loss: tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:47<04:51, 20.84s/it][A

loss: tensor(0.5799, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:07<04:30, 20.84s/it][A

tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:28<04:09, 20.77s/it][A

tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:49<03:48, 20.79s/it][A

tensor(0.5854, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:09<03:27, 20.72s/it][A

loss: tensor(0.5562, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:30<03:06, 20.68s/it][A

tensor(0.6826, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [25:51<02:45, 20.64s/it][A

loss: tensor(0.5570, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:11<02:24, 20.69s/it][A

tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:32<02:04, 20.69s/it][A

tensor(0.7632, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:53<01:43, 20.71s/it][A

tensor(0.5963, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:14<01:23, 20.77s/it][A

tensor(0.6821, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:35<01:02, 20.78s/it][A

tensor(0.6802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:55<00:41, 20.78s/it][A

tensor(0.4935, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:16<00:20, 20.78s/it][A

tensor(0.4869, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:23<00:00, 20.53s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7183, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.59674865833248

	train acc: 0.6633351588170865

	training prec: 0.8523513869844569

	training rec: 0.6633351588170865

	training f1: 0.7185386775156821

	Current Learning rate:  0.00031428571428571427



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.68s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:34,  2.63s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.66s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5918919557616824

	Validation acc: 0.6922123015873016

	Validation prec: 0.8554124170162961

	Validation rec: 0.6922123015873016

	Validation f1: 0.746686810834158
loss: 


  1%|          | 1/83 [00:20<28:24, 20.78s/it][A

tensor(0.5830, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:04, 20.79s/it][A

tensor(0.6863, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:45, 20.82s/it][A

tensor(0.6089, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:23<27:22, 20.79s/it][A

loss: tensor(0.5659, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.4956, device='cuda:0', grad_fn=<NllLossBackward>)


  6%|▌         | 5/83 [01:43<26:52, 20.67s/it][A
  7%|▋         | 6/83 [02:04<26:27, 20.61s/it][A

loss: tensor(0.5915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:04, 20.58s/it][A

tensor(0.5497, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:42, 20.57s/it][A

tensor(0.5188, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:05<25:24, 20.60s/it][A

loss: tensor(0.5625, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:26<25:09, 20.67s/it][A

tensor(0.5974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:47, 20.66s/it][A

tensor(0.6501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:27, 20.67s/it][A

tensor(0.5404, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:28<24:06, 20.66s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:49<23:46, 20.67s/it][A

tensor(0.5615, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:24, 20.66s/it][A

tensor(0.5753, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:04, 20.67s/it][A

tensor(0.5686, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:51<22:45, 20.68s/it][A

loss: tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:12<22:23, 20.67s/it][A

loss: tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:32<22:02, 20.67s/it][A

loss: tensor(0.5494, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:53<21:39, 20.63s/it][A

loss: tensor(0.5323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:13<21:18, 20.62s/it][A

tensor(0.5558, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:34<20:57, 20.61s/it][A

tensor(0.5631, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:55<20:36, 20.61s/it][A

loss: tensor(0.4903, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:15<20:16, 20.61s/it][A

loss: tensor(0.5110, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<19:55, 20.61s/it][A

tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:56<19:35, 20.63s/it][A

tensor(0.5156, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:17<19:19, 20.71s/it][A

loss: tensor(0.4432, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6413, device='cuda:0', grad_fn=<NllLossBackward>)


 34%|███▎      | 28/83 [09:38<19:00, 20.74s/it][A

loss: 


 35%|███▍      | 29/83 [09:59<18:41, 20.77s/it][A

tensor(0.5290, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:20<18:23, 20.81s/it][A

loss: tensor(0.5775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:41<18:02, 20.81s/it][A

tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:02<17:41, 20.81s/it][A

tensor(0.7514, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|███▉      | 33/83 [11:22<17:19, 20.79s/it][A

loss: tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:43<17:00, 20.82s/it][A

loss: tensor(0.7487, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:04<16:41, 20.86s/it][A

tensor(0.6175, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:25<16:19, 20.84s/it][A

tensor(0.6075, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:46<15:57, 20.82s/it][A

tensor(0.6148, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:07<15:37, 20.82s/it][A

tensor(0.6149, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:27<15:14, 20.78s/it][A

tensor(0.6807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:48<14:52, 20.76s/it][A

tensor(0.6233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:09<14:30, 20.72s/it][A

tensor(0.6168, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:29<14:08, 20.69s/it][A

tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:50<13:46, 20.66s/it][A

tensor(0.5640, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:10<13:24, 20.63s/it][A

tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:31<13:03, 20.63s/it][A

tensor(0.4818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:52<12:43, 20.62s/it][A

tensor(0.5520, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:12<12:20, 20.58s/it][A

tensor(0.7158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:33<12:01, 20.60s/it][A

tensor(0.5617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:53<11:41, 20.64s/it][A

tensor(0.5741, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:14<11:22, 20.68s/it][A

tensor(0.6180, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:35<11:02, 20.69s/it][A

tensor(0.4836, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [17:56<10:41, 20.70s/it][A

loss: tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:16<10:20, 20.69s/it][A

loss: tensor(0.6254, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:37<10:00, 20.70s/it][A

tensor(0.5576, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:58<09:39, 20.71s/it][A

tensor(0.5123, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:18<09:19, 20.72s/it][A

loss: tensor(0.6564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:39<08:57, 20.69s/it][A

tensor(0.6138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:00<08:36, 20.67s/it][A

tensor(0.5324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:20<08:16, 20.67s/it][A

tensor(0.5409, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:41<07:55, 20.67s/it][A

tensor(0.5613, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [21:02<07:34, 20.66s/it][A

loss: tensor(0.5637, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:22<07:13, 20.63s/it][A

loss: tensor(0.7058, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:43<06:52, 20.61s/it][A

loss: tensor(0.7203, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:03<06:31, 20.59s/it][A

loss: tensor(0.6124, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:24<06:10, 20.59s/it][A

loss: tensor(0.4649, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:45<05:50, 20.60s/it][A

loss: tensor(0.5710, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:05<05:29, 20.62s/it][A

loss: tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:26<05:09, 20.67s/it][A

tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:47<04:49, 20.69s/it][A

tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:07<04:28, 20.68s/it][A

tensor(0.6542, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:28<04:08, 20.71s/it][A

tensor(0.5726, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:49<03:48, 20.77s/it][A

tensor(0.5851, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5078, device='cuda:0', grad_fn=<NllLossBackward>)


 88%|████████▊ | 73/83 [25:10<03:27, 20.77s/it][A

loss: 


 89%|████████▉ | 74/83 [25:31<03:07, 20.84s/it][A

tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:52<02:47, 20.88s/it][A

tensor(0.5728, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:12<02:25, 20.78s/it][A

tensor(0.5150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:33<02:04, 20.75s/it][A

tensor(0.5516, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:54<01:43, 20.68s/it][A

tensor(0.6223, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:14<01:22, 20.65s/it][A

tensor(0.5516, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:35<01:01, 20.60s/it][A

loss: tensor(0.7142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:55<00:41, 20.61s/it][A

tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7801, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:16<00:20, 20.64s/it][A
100%|██████████| 83/83 [28:23<00:00, 20.53s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7606, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5959206029593226

	train acc: 0.6693934830230012

	training prec: 0.8513734685751527

	training rec: 0.6693934830230012

	training f1: 0.7247579901184934

	Current Learning rate:  0.0002857142857142857



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.66s/it][A
  7%|▋         | 3/42 [00:07<01:44,  2.67s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.66s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.65s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:04,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5955498168865839

	Validation acc: 0.7093253968253969

	Validation prec: 0.8531579449659417

	Validation rec: 0.7093253968253969

	Validation f1: 0.7592464585412466
loss: 


  1%|          | 1/83 [00:20<28:17, 20.70s/it][A

tensor(0.6233, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:57, 20.71s/it][A

tensor(0.5597, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:34, 20.68s/it][A

tensor(0.5652, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 4/83 [01:22<27:14, 20.69s/it][A

loss: tensor(0.5393, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:54, 20.70s/it][A

tensor(0.5572, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:35, 20.73s/it][A

tensor(0.5662, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:12, 20.68s/it][A

tensor(0.5507, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:45<25:47, 20.64s/it][A

loss: tensor(0.5899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:26, 20.63s/it][A

tensor(0.5904, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:26<25:04, 20.61s/it][A

loss: tensor(0.6321, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:43, 20.60s/it][A

tensor(0.5266, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:07<24:23, 20.62s/it][A

tensor(0.5324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:28<24:08, 20.70s/it][A

tensor(0.6334, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:49<23:47, 20.69s/it][A

loss: tensor(0.5958, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:09<23:23, 20.64s/it][A

tensor(0.5909, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:01, 20.62s/it][A

tensor(0.6709, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:50<22:38, 20.59s/it][A

loss: tensor(0.5099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:11<22:17, 20.57s/it][A

tensor(0.5536, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:32<21:56, 20.57s/it]

loss: tensor(0.8626, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 24%|██▍       | 20/83 [06:52<21:35, 20.56s/it][A

tensor(0.7201, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:13<21:12, 20.53s/it][A

loss: tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:33<20:49, 20.48s/it][A

tensor(0.4761, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:53<20:27, 20.45s/it][A

loss: tensor(0.5551, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:14<20:06, 20.46s/it][A

loss: tensor(0.5601, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:34<19:46, 20.45s/it][A

tensor(0.7081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [08:55<19:27, 20.48s/it][A

tensor(0.7275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:15<19:06, 20.48s/it][A

tensor(0.5687, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:36<18:46, 20.49s/it][A

loss: tensor(0.6994, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [09:56<18:26, 20.49s/it][A

loss: tensor(0.6305, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:17<18:06, 20.50s/it][A

tensor(0.5684, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:37<17:46, 20.50s/it][A

tensor(0.6662, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [10:58<17:25, 20.50s/it][A

tensor(0.5655, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:18<17:03, 20.47s/it][A

tensor(0.6502, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:39<16:42, 20.46s/it][A

tensor(0.5776, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [11:59<16:21, 20.45s/it][A

tensor(0.4593, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:20<16:01, 20.47s/it][A

tensor(0.5514, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:40<15:45, 20.55s/it][A

loss: 


 46%|████▌     | 38/83 [13:01<15:28, 20.63s/it][A

tensor(0.5058, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:22<15:08, 20.66s/it][A

loss: tensor(0.5803, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:43<14:49, 20.69s/it][A

tensor(0.6726, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:03<14:28, 20.68s/it][A

tensor(0.5913, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:24<14:06, 20.65s/it][A

loss: tensor(0.5360, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:44<13:45, 20.65s/it][A

loss: tensor(0.6621, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6196, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:05<13:25, 20.64s/it][A

loss: 


 54%|█████▍    | 45/83 [15:26<13:04, 20.64s/it][A

tensor(0.5372, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:46<12:43, 20.64s/it][A

tensor(0.5604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:07<12:23, 20.64s/it][A

tensor(0.5499, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:28<12:02, 20.65s/it][A

tensor(0.5269, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:48<11:42, 20.65s/it][A

tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:09<11:21, 20.66s/it][A

tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:30<11:01, 20.66s/it][A

tensor(0.7824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:50<10:40, 20.66s/it][A

tensor(0.5600, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:11<10:19, 20.65s/it][A

tensor(0.5483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:32<10:00, 20.69s/it][A

tensor(0.5733, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [18:52<09:38, 20.65s/it][A

loss: tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:13<09:17, 20.64s/it][A

loss: tensor(0.4823, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:34<08:56, 20.64s/it][A

loss: tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [19:54<08:35, 20.63s/it][A

loss: tensor(0.5642, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:15<08:16, 20.67s/it][A

loss: tensor(0.6740, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:36<07:56, 20.71s/it][A

tensor(0.7113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [20:57<07:37, 20.81s/it][A

tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:18<07:16, 20.78s/it][A

tensor(0.5977, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:38<06:55, 20.79s/it][A

tensor(0.5586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:59<06:33, 20.71s/it][A

tensor(0.4955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.4936, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:20<06:12, 20.71s/it][A
 80%|███████▉  | 66/83 [22:40<05:51, 20.70s/it][A

loss: tensor(0.5779, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:01<05:30, 20.69s/it][A

loss: tensor(0.6658, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:22<05:10, 20.67s/it][A

loss: tensor(0.6825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:42<04:49, 20.68s/it][A

tensor(0.5966, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:03<04:28, 20.66s/it][A

tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:24<04:08, 20.68s/it][A

tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:44<03:47, 20.70s/it][A

tensor(0.6215, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:05<03:26, 20.68s/it][A

tensor(0.5433, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:26<03:06, 20.67s/it][A

tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:46<02:45, 20.67s/it][A

tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:07<02:24, 20.68s/it][A

tensor(0.5358, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:28<02:04, 20.71s/it][A

tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:49<01:43, 20.77s/it][A

tensor(0.5653, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:09<01:23, 20.75s/it][A

loss: tensor(0.5507, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:30<01:02, 20.79s/it][A

tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:51<00:41, 20.77s/it][A

tensor(0.5292, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:12<00:20, 20.75s/it][A

tensor(0.5457, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:19<00:00, 20.47s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4430, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.593635664646884

	train acc: 0.660288882803943

	training prec: 0.8562571831968404

	training rec: 0.660288882803943

	training f1: 0.7169389335808405

	Current Learning rate:  0.0002571428571428571



  2%|▏         | 1/42 [00:02<01:49,  2.67s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.68s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.66s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.66s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.65s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.65s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5929943174123764

	Validation acc: 0.6495535714285714

	Validation prec: 0.8579916908259323

	Validation rec: 0.6495535714285714

	Validation f1: 0.7109546307710726
loss: 


  1%|          | 1/83 [00:20<28:19, 20.72s/it][A

tensor(0.5616, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:58, 20.72s/it][A

tensor(0.5396, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:36, 20.70s/it][A

tensor(0.5260, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:14, 20.69s/it][A

tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:52, 20.67s/it][A

tensor(0.4549, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:30, 20.66s/it][A

tensor(0.5231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:09, 20.66s/it][A

tensor(0.5991, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:50, 20.67s/it][A

tensor(0.5751, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:06<25:31, 20.70s/it][A

loss: tensor(0.5090, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:26<25:09, 20.68s/it][A

loss: tensor(0.7152, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:51, 20.72s/it][A

tensor(0.8065, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:32, 20.74s/it][A

tensor(0.7097, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:09, 20.71s/it][A

tensor(0.6412, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:49<23:51, 20.75s/it][A

loss: tensor(0.6369, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:30, 20.74s/it][A

tensor(0.7826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:31<23:08, 20.73s/it][A

tensor(0.6930, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:51<22:45, 20.70s/it][A

tensor(0.5649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:24, 20.69s/it][A

tensor(0.6045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:33<22:04, 20.69s/it][A

tensor(0.6020, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:54<21:44, 20.71s/it][A

tensor(0.6156, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:14<21:23, 20.70s/it][A

loss: tensor(0.5979, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:35<21:01, 20.68s/it][A

tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:56<20:40, 20.67s/it][A

loss: tensor(0.6716, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:16<20:17, 20.64s/it][A

tensor(0.6555, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:37<19:54, 20.60s/it][A

loss: tensor(0.6947, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [08:57<19:32, 20.57s/it][A

loss: tensor(0.5593, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:18<19:11, 20.56s/it][A

tensor(0.6340, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:38<18:50, 20.56s/it][A

tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [09:59<18:30, 20.57s/it][A

tensor(0.7455, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:19<18:11, 20.59s/it][A

tensor(0.5093, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:40<17:50, 20.58s/it][A

loss: tensor(0.5129, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:01<17:30, 20.60s/it][A

tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:21<17:10, 20.60s/it][A

tensor(0.5446, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:42<16:50, 20.62s/it][A

tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:03<16:29, 20.62s/it][A

tensor(0.5840, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:23<16:09, 20.63s/it][A

tensor(0.4426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:44<15:49, 20.65s/it][A

tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:05<15:30, 20.67s/it][A

tensor(0.4856, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:25<15:09, 20.66s/it][A

tensor(0.5647, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:46<14:49, 20.68s/it][A

loss: tensor(0.5182, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 41/83 [14:07<14:28, 20.68s/it][A

loss: tensor(0.4968, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:27<14:07, 20.67s/it][A

loss: tensor(0.5396, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:48<13:47, 20.68s/it][A

tensor(0.4829, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:09<13:26, 20.67s/it][A

tensor(0.6298, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:29<13:06, 20.71s/it][A

tensor(0.4899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:50<12:45, 20.69s/it][A

tensor(0.5483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:11<12:24, 20.68s/it][A

tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:32<12:05, 20.72s/it][A

tensor(0.5108, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [16:52<11:44, 20.72s/it][A

loss: tensor(0.5538, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:13<11:23, 20.70s/it][A

tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:33<11:00, 20.65s/it][A

tensor(0.5445, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:54<10:39, 20.62s/it][A

tensor(0.5817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6536, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:14<10:17, 20.58s/it][A
 65%|██████▌   | 54/83 [18:35<09:56, 20.55s/it][A

loss: tensor(0.6294, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:55<09:35, 20.54s/it][A

tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:16<09:14, 20.56s/it][A

loss: tensor(0.5951, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:37<08:53, 20.53s/it][A

loss: tensor(0.5278, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [19:57<08:35, 20.60s/it][A

tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:18<08:15, 20.66s/it][A

tensor(0.5047, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:39<07:56, 20.71s/it][A

loss: tensor(0.7738, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:00<07:34, 20.68s/it][A

tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:20<07:13, 20.66s/it][A

tensor(0.6273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:41<06:52, 20.64s/it][A

tensor(0.6089, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:01<06:31, 20.63s/it][A

loss: tensor(0.5904, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:22<06:11, 20.63s/it][A

tensor(0.4619, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:43<05:50, 20.63s/it][A

tensor(0.6110, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:03<05:30, 20.63s/it][A

tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:24<05:10, 20.67s/it][A

loss: tensor(0.6137, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:45<04:50, 20.77s/it][A

tensor(0.7746, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:06<04:30, 20.81s/it][A

tensor(0.6260, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:27<04:09, 20.83s/it][A

tensor(0.5364, device='cuda:0', grad_fn=<NllLossBackward>)



 87%|████████▋ | 72/83 [24:47<03:48, 20.80s/it][A

loss: tensor(0.5101, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:08<03:27, 20.78s/it][A

tensor(0.7185, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:29<03:07, 20.81s/it][A

tensor(0.5527, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [25:50<02:46, 20.80s/it][A

loss: tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:10<02:25, 20.74s/it][A

tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:31<02:04, 20.71s/it][A

tensor(0.5403, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:52<01:43, 20.68s/it][A

tensor(0.5192, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:12<01:22, 20.69s/it][A

tensor(0.5872, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:33<01:02, 20.73s/it][A

tensor(0.5293, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [27:54<00:41, 20.80s/it][A

loss: tensor(0.7028, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:15<00:20, 20.78s/it][A

tensor(0.6641, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:22<00:00, 20.51s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4723, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5952795129942606

	train acc: 0.6744763143483024

	training prec: 0.854743665608158

	training rec: 0.6744763143483024

	training f1: 0.7287303641604745

	Current Learning rate:  0.00022857142857142857



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.68s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.67s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.66s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.67s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.65s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.67s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.66s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.67s/it][A
 40%|████      | 17/42 [00:45<01:07,  2.68s/it][A
 43%|████▎     | 18/42 [00:47<01:04,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5923944690397808

	Validation acc: 0.6502976190476191

	Validation prec: 0.8645455092749743

	Validation rec: 0.6502976190476191

	Validation f1: 0.7142176907249624
loss: 


  1%|          | 1/83 [00:20<28:13, 20.65s/it][A

tensor(0.4789, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:41<27:49, 20.61s/it][A

loss: tensor(0.5907, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:34, 20.69s/it][A

tensor(0.6405, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:12, 20.66s/it][A

tensor(0.5826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5402, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:43<26:48, 20.63s/it][A

loss: 


  7%|▋         | 6/83 [02:03<26:28, 20.63s/it][A

tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:06, 20.62s/it][A

tensor(0.5871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:48, 20.64s/it][A

tensor(0.5689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:05<25:28, 20.66s/it][A

tensor(0.4439, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5155, device='cuda:0', grad_fn=<NllLossBackward>)


 12%|█▏        | 10/83 [03:26<25:05, 20.62s/it][A

loss: 


 13%|█▎        | 11/83 [03:46<24:43, 20.61s/it][A

tensor(0.5739, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:07<24:20, 20.58s/it][A

loss: tensor(0.5128, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:27<23:59, 20.56s/it][A

tensor(0.6179, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:48<23:37, 20.54s/it][A

tensor(0.5557, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:08<23:16, 20.54s/it][A

tensor(0.7749, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:29<22:55, 20.53s/it][A

loss: tensor(0.5337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:50<22:37, 20.56s/it][A

tensor(0.6382, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:10<22:18, 20.59s/it][A

tensor(0.6463, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:31<22:01, 20.65s/it][A

tensor(0.6038, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:52<21:45, 20.73s/it][A

tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:13<21:27, 20.77s/it][A

tensor(0.5833, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:34<21:08, 20.80s/it][A

tensor(0.5180, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:55<20:48, 20.81s/it][A

tensor(0.5519, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:15<20:26, 20.79s/it][A

loss: tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<20:03, 20.74s/it][A

tensor(0.5365, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [08:56<19:38, 20.68s/it][A

loss: tensor(0.6289, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:17<19:15, 20.64s/it][A

tensor(0.5775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6536, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:38<18:53, 20.61s/it][A

loss: 


 35%|███▍      | 29/83 [09:58<18:33, 20.62s/it][A

tensor(0.5103, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:19<18:19, 20.75s/it][A

tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:40<18:01, 20.80s/it][A

tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:01<17:44, 20.87s/it][A

tensor(0.4866, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:22<17:23, 20.87s/it][A

tensor(0.6018, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:43<17:02, 20.86s/it][A

tensor(0.6509, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:04<16:39, 20.82s/it][A

tensor(0.5464, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:24<16:19, 20.84s/it][A

tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:45<15:59, 20.86s/it][A

tensor(0.5455, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:06<15:39, 20.87s/it][A

tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:27<15:16, 20.83s/it][A

tensor(0.5782, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:48<14:55, 20.82s/it][A

loss: tensor(0.5355, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:09<14:34, 20.81s/it][A

tensor(0.5697, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:30<14:15, 20.87s/it][A

tensor(0.4785, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:50<13:53, 20.83s/it][A

tensor(0.7304, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:11<13:31, 20.80s/it][A

loss: tensor(0.6337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:32<13:09, 20.79s/it][A

tensor(0.4938, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:53<12:51, 20.85s/it][A

tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:14<12:31, 20.87s/it][A

tensor(0.6659, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:34<12:08, 20.82s/it][A

tensor(0.6511, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:54<11:38, 20.55s/it][A

tensor(0.5842, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:15<11:15, 20.46s/it][A

tensor(0.6058, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 61%|██████▏   | 51/83 [17:36<11:01, 20.66s/it][A

tensor(0.5915, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:57<10:42, 20.73s/it][A

tensor(0.6527, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:18<10:24, 20.83s/it][A

tensor(0.6519, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:39<10:04, 20.86s/it][A

tensor(0.7268, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:00<09:45, 20.90s/it][A

tensor(0.5109, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:21<09:24, 20.91s/it][A

tensor(0.5534, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:41<09:03, 20.90s/it][A

tensor(0.6515, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:02<08:43, 20.93s/it][A

tensor(0.6498, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:24<08:24, 21.03s/it][A

tensor(0.4723, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:45<08:03, 21.01s/it][A

tensor(0.6418, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:06<07:41, 20.98s/it][A

tensor(0.5723, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:26<07:20, 20.96s/it][A

tensor(0.5980, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:48<06:59, 20.99s/it][A

tensor(0.5196, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:08<06:36, 20.89s/it][A

tensor(0.6738, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:29<06:15, 20.89s/it][A

tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:50<05:54, 20.87s/it][A

tensor(0.5119, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:11<05:33, 20.82s/it][A

loss: tensor(0.7213, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 68/83 [23:31<05:10, 20.70s/it]

loss: tensor(0.5324, device='cuda:0', grad_fn=<NllLossBackward>)


[A
 83%|████████▎ | 69/83 [23:52<04:48, 20.64s/it][A

loss: tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:12<04:27, 20.57s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6588, device='cuda:0', grad_fn=<NllLossBackward>)


 86%|████████▌ | 71/83 [24:32<04:06, 20.55s/it][A

loss: 


 87%|████████▋ | 72/83 [24:53<03:46, 20.62s/it][A

tensor(0.4935, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:14<03:26, 20.69s/it][A

tensor(0.5634, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:35<03:06, 20.74s/it][A

tensor(0.6296, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:56<02:46, 20.79s/it][A

tensor(0.5909, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:17<02:25, 20.82s/it][A

loss: tensor(0.5891, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:37<02:04, 20.80s/it][A

tensor(0.6877, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:58<01:43, 20.77s/it][A

tensor(0.5302, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:19<01:23, 20.78s/it][A

tensor(0.5544, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:40<01:02, 20.78s/it][A

loss: tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:00<00:41, 20.76s/it][A

tensor(0.5184, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.4909, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:21<00:20, 20.78s/it][A
100%|██████████| 83/83 [28:28<00:00, 20.59s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6687, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5913452111094831

	train acc: 0.6628388554216867

	training prec: 0.8533071012676651

	training rec: 0.6628388554216867

	training f1: 0.7193099227649823

	Current Learning rate:  0.0002



  2%|▏         | 1/42 [00:02<01:47,  2.63s/it][A
  5%|▍         | 2/42 [00:05<01:46,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.65s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.65s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.67s/it][A
 21%|██▏       | 9/42 [00:23<01:28,  2.67s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.66s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.68s/it][A
 29%|██▊       | 12/42 [00:31<01:20,  2.68s/it][A
 31%|███       | 13/42 [00:34<01:18,  2.70s/it][A
 33%|███▎      | 14/42 [00:37<01:15,  2.70s/it][A
 36%|███▌      | 15/42 [00:40<01:12,  2.69s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.67s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.67s/it][A
 43%|████▎     | 18/42 [00:48<01:04,  2.68s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.66s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.6011298313027337

	Validation acc: 0.6153273809523809

	Validation prec: 0.868726851520366

	Validation rec: 0.6153273809523809

	Validation f1: 0.6831840349493308
loss: 


  1%|          | 1/83 [00:20<28:24, 20.78s/it][A

tensor(0.6261, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<28:01, 20.76s/it][A

tensor(0.5405, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:34, 20.68s/it][A

tensor(0.5951, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:11, 20.66s/it][A

tensor(0.6317, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:51, 20.66s/it][A

tensor(0.6066, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:31, 20.67s/it][A

tensor(0.5388, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5725, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:24<26:12, 20.69s/it][A

loss: 


 10%|▉         | 8/83 [02:45<25:49, 20.66s/it][A

tensor(0.4595, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:28, 20.65s/it][A

tensor(0.5779, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:26<25:07, 20.66s/it][A

tensor(0.5890, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:47<24:47, 20.66s/it][A

tensor(0.4689, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:26, 20.66s/it][A

tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:28<24:08, 20.69s/it][A

tensor(0.4784, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [04:49<23:45, 20.66s/it][A

loss: tensor(0.5525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:10<23:28, 20.71s/it][A

tensor(0.5113, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:04, 20.67s/it][A

tensor(0.6250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:51<22:44, 20.67s/it][A

tensor(0.6963, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:24, 20.68s/it][A

tensor(0.5607, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:32<22:03, 20.68s/it][A

tensor(0.8634, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:53<21:41, 20.67s/it][A

tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:14<21:20, 20.66s/it][A

tensor(0.6047, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:34<21:02, 20.69s/it][A

loss: tensor(0.4196, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:55<20:40, 20.67s/it][A

loss: tensor(0.5930, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:16<20:19, 20.68s/it][A

loss: tensor(0.5944, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:36<19:56, 20.63s/it][A

loss: tensor(0.5661, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [08:57<19:33, 20.58s/it][A

loss: tensor(0.6379, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:17<19:11, 20.57s/it][A

tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:38<18:53, 20.60s/it][A

tensor(0.4994, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [09:59<18:32, 20.60s/it]

loss: tensor(0.5991, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 36%|███▌      | 30/83 [10:19<18:10, 20.58s/it][A

tensor(0.5504, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:40<17:51, 20.61s/it][A

tensor(0.6583, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:00<17:32, 20.65s/it][A

loss: tensor(0.5879, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:21<17:14, 20.68s/it][A

tensor(0.7165, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:42<16:54, 20.71s/it][A

loss: tensor(0.7481, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:03<16:32, 20.69s/it][A

loss: tensor(0.5523, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:23<16:10, 20.65s/it][A

tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5483, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:44<15:51, 20.68s/it][A

loss: 


 46%|████▌     | 38/83 [13:05<15:30, 20.67s/it][A

tensor(0.5685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:25<15:09, 20.66s/it][A

tensor(0.5974, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:46<14:48, 20.66s/it][A

tensor(0.4975, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:07<14:29, 20.71s/it][A

tensor(0.5824, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:27<14:09, 20.71s/it][A

tensor(0.5871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:48<13:45, 20.63s/it][A

tensor(0.7089, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:08<13:22, 20.57s/it][A

tensor(0.5030, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5266, device='cuda:0', grad_fn=<NllLossBackward>)


 54%|█████▍    | 45/83 [15:29<13:01, 20.56s/it][A

loss: 


 55%|█████▌    | 46/83 [15:49<12:39, 20.52s/it][A

tensor(0.7052, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:10<12:17, 20.48s/it][A

tensor(0.5214, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:31<12:01, 20.61s/it][A

tensor(0.4924, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:53<11:54, 21.01s/it][A

tensor(0.5072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:15<11:43, 21.30s/it][A

tensor(0.5798, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:37<11:29, 21.54s/it][A

loss: 


 63%|██████▎   | 52/83 [17:58<11:08, 21.56s/it][A

tensor(0.5695, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:20<10:50, 21.67s/it][A

tensor(0.5346, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:42<10:29, 21.70s/it][A

tensor(0.6288, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:04<10:07, 21.71s/it][A

tensor(0.5947, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:26<09:50, 21.87s/it][A

tensor(0.5405, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:48<09:30, 21.93s/it][A

tensor(0.6859, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:10<09:12, 22.10s/it][A

tensor(0.5428, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:33<08:54, 22.27s/it][A

tensor(0.5207, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:55<08:31, 22.25s/it][A

tensor(0.6000, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:18<08:12, 22.39s/it][A

tensor(0.6276, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:41<07:50, 22.42s/it][A

tensor(0.7255, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:03<07:27, 22.37s/it][A

tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:25<07:06, 22.46s/it][A

tensor(0.5080, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:48<06:43, 22.41s/it][A

tensor(0.5483, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [23:10<06:20, 22.39s/it][A

tensor(0.6261, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:32<05:58, 22.38s/it][A

loss: tensor(0.7235, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:54<05:34, 22.28s/it][A

tensor(0.5548, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:17<05:12, 22.30s/it][A

tensor(0.5205, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:39<04:48, 22.21s/it][A

loss: tensor(0.4920, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [25:01<04:27, 22.32s/it][A

tensor(0.5035, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [25:23<04:02, 22.03s/it][A

tensor(0.6485, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:45<03:39, 21.97s/it][A

tensor(0.5384, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [26:07<03:17, 21.96s/it][A

tensor(0.5951, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [26:28<02:55, 21.92s/it][A

loss: tensor(0.5482, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:50<02:31, 21.70s/it][A

tensor(0.6324, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [27:11<02:09, 21.58s/it][A

tensor(0.5942, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:33<01:48, 21.77s/it][A

tensor(0.5876, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:55<01:26, 21.69s/it][A

tensor(0.5457, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [28:16<01:05, 21.75s/it][A

tensor(0.5842, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:39<00:43, 21.92s/it][A

tensor(0.6681, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [29:01<00:21, 21.88s/it][A

tensor(0.5212, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [29:08<00:00, 21.07s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4896, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5835111823426672

	train acc: 0.6783953997809419

	training prec: 0.8626686867009874

	training rec: 0.6783953997809419

	training f1: 0.7336510261316853

	Current Learning rate:  0.00017142857142857143



  2%|▏         | 1/42 [00:03<02:05,  3.06s/it][A
  5%|▍         | 2/42 [00:05<01:52,  2.81s/it][A
  7%|▋         | 3/42 [00:08<01:48,  2.78s/it][A
 10%|▉         | 4/42 [00:11<01:47,  2.82s/it][A
 12%|█▏        | 5/42 [00:14<01:42,  2.78s/it][A
 14%|█▍        | 6/42 [00:16<01:39,  2.77s/it][A
 17%|█▋        | 7/42 [00:19<01:39,  2.84s/it][A
 19%|█▉        | 8/42 [00:22<01:38,  2.91s/it][A
 21%|██▏       | 9/42 [00:25<01:35,  2.88s/it][A
 24%|██▍       | 10/42 [00:28<01:31,  2.86s/it][A
 26%|██▌       | 11/42 [00:31<01:26,  2.78s/it][A
 29%|██▊       | 12/42 [00:34<01:25,  2.84s/it][A
 31%|███       | 13/42 [00:37<01:24,  2.91s/it][A
 33%|███▎      | 14/42 [00:40<01:21,  2.91s/it][A
 36%|███▌      | 15/42 [00:42<01:18,  2.92s/it][A
 38%|███▊      | 16/42 [00:45<01:15,  2.89s/it][A
 40%|████      | 17/42 [00:48<01:12,  2.89s/it][A
 43%|████▎     | 18/42 [00:51<01:09,  2.91s/it][A
 45%|████▌     | 19/42 [00:54<01:07,  2.92s/it][A
 48%|████▊     | 20/42 [00:57<01:05,  2


	Validation loss: 0.6003091562361944

	Validation acc: 0.661954365079365

	Validation prec: 0.8518881999431199

	Validation rec: 0.661954365079365

	Validation f1: 0.7216652802988545
loss: 


  1%|          | 1/83 [00:22<30:06, 22.04s/it][A

tensor(0.4957, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:44<30:11, 22.36s/it][A

tensor(0.6224, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:07<30:17, 22.71s/it][A

tensor(0.5971, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:29<29:33, 22.45s/it][A

tensor(0.5200, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:52<29:04, 22.37s/it][A

tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:13<28:17, 22.05s/it][A

tensor(0.5466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:34<27:42, 21.87s/it][A

tensor(0.5568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:56<27:08, 21.72s/it][A

tensor(0.4045, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:18<26:47, 21.73s/it][A

tensor(0.6386, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:40<26:41, 21.94s/it][A

tensor(0.6888, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [04:02<26:24, 22.01s/it][A

tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:24<25:58, 21.95s/it][A

loss: tensor(0.6172, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:47<25:49, 22.13s/it][A

tensor(0.6061, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 14/83 [05:08<25:15, 21.97s/it][A

loss: tensor(0.5616, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:29<24:30, 21.62s/it][A

tensor(0.6465, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:50<23:53, 21.39s/it][A

tensor(0.7079, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [06:11<23:17, 21.18s/it][A

tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:31<22:49, 21.07s/it][A

tensor(0.5323, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:52<22:22, 20.98s/it][A

tensor(0.5334, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [07:13<21:57, 20.91s/it][A

tensor(0.6130, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:34<21:34, 20.88s/it][A

tensor(0.4500, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:54<21:12, 20.86s/it][A

loss: tensor(0.5553, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [08:15<20:45, 20.76s/it][A

loss: tensor(0.6750, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:36<20:21, 20.70s/it][A

loss: tensor(0.5459, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:56<19:59, 20.69s/it][A

loss: tensor(0.5681, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 31%|███▏      | 26/83 [09:17<19:38, 20.68s/it][A

tensor(0.6209, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:37<19:16, 20.65s/it][A

tensor(0.7117, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:58<18:58, 20.69s/it][A

loss: tensor(0.5714, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:19<18:37, 20.70s/it][A

tensor(0.6106, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:40<18:17, 20.71s/it][A

tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [11:01<17:58, 20.75s/it][A

tensor(0.6640, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:21<17:37, 20.74s/it][A

tensor(0.6085, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:42<17:17, 20.74s/it][A

tensor(0.6568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [12:03<16:55, 20.73s/it][A

tensor(0.5244, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:24<16:37, 20.78s/it][A

tensor(0.5027, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7164, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:44<16:15, 20.76s/it][A

loss: 


 45%|████▍     | 37/83 [13:05<15:54, 20.76s/it][A

tensor(0.5084, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:26<15:33, 20.75s/it][A

tensor(0.6219, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:47<15:14, 20.77s/it][A

tensor(0.6759, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [14:07<14:52, 20.76s/it][A

loss: tensor(0.5132, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:28<14:32, 20.78s/it][A

tensor(0.7291, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:49<14:10, 20.74s/it][A

tensor(0.7820, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [15:10<13:48, 20.72s/it][A

tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:30<13:26, 20.67s/it][A

tensor(0.5894, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:51<13:03, 20.63s/it][A

tensor(0.5274, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [16:11<12:41, 20.59s/it][A

loss: tensor(0.6177, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:32<12:21, 20.60s/it][A

tensor(0.7334, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:52<12:00, 20.59s/it][A

tensor(0.4797, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [17:13<11:40, 20.59s/it][A

tensor(0.5333, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:33<11:19, 20.60s/it][A

tensor(0.4358, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:54<11:00, 20.64s/it]

loss: tensor(0.6012, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 63%|██████▎   | 52/83 [18:15<10:40, 20.66s/it][A

tensor(0.5265, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:35<10:18, 20.63s/it][A

loss: tensor(0.5861, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:56<09:56, 20.57s/it][A

loss: tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [19:16<09:34, 20.51s/it][A

loss: tensor(0.5077, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:37<09:13, 20.51s/it][A

loss: tensor(0.5602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:57<08:52, 20.50s/it][A

tensor(0.6441, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:18<08:32, 20.51s/it]

loss: tensor(0.5612, device='cuda:0', grad_fn=<NllLossBackward>)


[A
 71%|███████   | 59/83 [20:38<08:13, 20.55s/it][A

loss: tensor(0.5759, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:59<07:53, 20.58s/it][A

loss: tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:20<07:32, 20.56s/it][A

tensor(0.6395, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:40<07:11, 20.57s/it][A

tensor(0.5871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [22:01<06:52, 20.64s/it][A

tensor(0.6204, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:22<06:32, 20.66s/it][A

tensor(0.4821, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:42<06:11, 20.62s/it][A

loss: tensor(0.5286, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5811, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [23:03<05:50, 20.62s/it][A

loss: tensor(0.6182, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [23:24<05:30, 20.65s/it][A
 82%|████████▏ | 68/83 [23:44<05:09, 20.65s/it][A

loss: tensor(0.5934, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [24:05<04:48, 20.62s/it][A

tensor(0.6217, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5185, device='cuda:0', grad_fn=<NllLossBackward>)


 84%|████████▍ | 70/83 [24:25<04:27, 20.60s/it][A

loss: tensor(0.5865, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 71/83 [24:46<04:07, 20.62s/it][A

loss: 


 87%|████████▋ | 72/83 [25:07<03:47, 20.68s/it][A

tensor(0.5250, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:28<03:26, 20.68s/it][A

tensor(0.5376, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:48<03:06, 20.73s/it][A

tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [26:11<02:50, 21.35s/it][A

tensor(0.5852, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:34<02:32, 21.76s/it][A

tensor(0.6065, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:56<02:11, 21.88s/it][A

tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [27:19<01:50, 22.09s/it][A

tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 95%|█████████▌| 79/83 [27:40<01:27, 21.77s/it][A

tensor(0.4654, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [28:00<01:04, 21.47s/it][A

loss: tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [28:23<00:43, 21.68s/it][A

tensor(0.6242, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:44<00:21, 21.69s/it][A

tensor(0.7062, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:52<00:00, 20.88s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7150, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5933370241917759

	train acc: 0.6635405257393209

	training prec: 0.8515353422761763

	training rec: 0.6635405257393209

	training f1: 0.7206003455998731

	Current Learning rate:  0.00014285714285714284



  2%|▏         | 1/42 [00:02<01:54,  2.79s/it][A
  5%|▍         | 2/42 [00:05<01:51,  2.80s/it][A
  7%|▋         | 3/42 [00:08<01:50,  2.83s/it][A
 10%|▉         | 4/42 [00:11<01:49,  2.89s/it][A
 12%|█▏        | 5/42 [00:14<01:48,  2.93s/it][A
 14%|█▍        | 6/42 [00:17<01:42,  2.85s/it][A
 17%|█▋        | 7/42 [00:20<01:40,  2.86s/it][A
 19%|█▉        | 8/42 [00:22<01:38,  2.89s/it][A
 21%|██▏       | 9/42 [00:25<01:35,  2.89s/it][A
 24%|██▍       | 10/42 [00:28<01:31,  2.87s/it][A
 26%|██▌       | 11/42 [00:31<01:28,  2.87s/it][A
 29%|██▊       | 12/42 [00:34<01:26,  2.89s/it][A
 31%|███       | 13/42 [00:37<01:22,  2.83s/it][A
 33%|███▎      | 14/42 [00:40<01:19,  2.84s/it][A
 36%|███▌      | 15/42 [00:42<01:16,  2.82s/it][A
 38%|███▊      | 16/42 [00:45<01:14,  2.85s/it][A
 40%|████      | 17/42 [00:48<01:11,  2.85s/it][A
 43%|████▎     | 18/42 [00:51<01:07,  2.82s/it][A
 45%|████▌     | 19/42 [00:54<01:05,  2.86s/it][A
 48%|████▊     | 20/42 [00:57<01:05,  2


	Validation loss: 0.5922344475984573

	Validation acc: 0.6339285714285714

	Validation prec: 0.8638360922320762

	Validation rec: 0.6339285714285714

	Validation f1: 0.700542132413139
loss: 


  1%|          | 1/83 [00:21<29:27, 21.56s/it][A

tensor(0.5551, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:42<28:46, 21.32s/it][A

loss: tensor(0.5438, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:03<28:08, 21.11s/it][A

loss: 


  5%|▍         | 4/83 [01:24<27:37, 20.98s/it][A

tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:45<27:12, 20.93s/it][A

tensor(0.5756, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:05<26:46, 20.87s/it][A

loss: tensor(0.5711, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:26<26:23, 20.84s/it][A

tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:47<26:00, 20.81s/it][A

loss: tensor(0.4820, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:08<25:40, 20.81s/it][A

tensor(0.5989, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:29<25:18, 20.81s/it][A

tensor(0.6436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:49<24:59, 20.83s/it][A

tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:10<24:36, 20.80s/it][A

tensor(0.5149, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:31<24:12, 20.75s/it][A

tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:51<23:49, 20.72s/it][A

tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:12<23:27, 20.70s/it][A

tensor(0.6261, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:33<23:06, 20.69s/it][A

tensor(0.6568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:54<22:47, 20.72s/it][A

tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:14<22:27, 20.74s/it][A

loss: tensor(0.6307, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:35<22:08, 20.76s/it][A

tensor(0.5528, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:56<21:48, 20.77s/it][A

loss: tensor(0.6146, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▌       | 21/83 [07:17<21:28, 20.78s/it][A

loss: tensor(0.5072, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:37<21:06, 20.76s/it][A

loss: tensor(0.5726, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:58<20:42, 20.71s/it][A

loss: tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:19<20:19, 20.68s/it][A

tensor(0.4483, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:39<20:00, 20.69s/it][A

loss: tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [09:00<19:39, 20.69s/it][A

loss: tensor(0.5358, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:21<19:17, 20.67s/it][A

tensor(0.5895, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:41<18:54, 20.63s/it][A

tensor(0.7400, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:02<18:32, 20.60s/it][A

tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:22<18:12, 20.62s/it][A

tensor(0.4002, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:43<17:52, 20.63s/it][A

tensor(0.5245, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:04<17:31, 20.62s/it][A

tensor(0.5253, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:24<17:10, 20.60s/it][A

tensor(0.4646, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:45<16:48, 20.59s/it][A

tensor(0.6064, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:06<16:29, 20.62s/it][A

tensor(0.6121, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:26<16:10, 20.65s/it][A

tensor(0.6278, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:47<15:52, 20.70s/it][A

tensor(0.5568, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:08<15:30, 20.68s/it][A

tensor(0.6261, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:28<15:09, 20.66s/it][A

tensor(0.5516, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:49<14:50, 20.70s/it][A

tensor(0.6930, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:10<14:28, 20.69s/it][A

tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 42/83 [14:30<14:08, 20.69s/it][A

loss: tensor(0.6404, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:51<13:49, 20.73s/it][A

tensor(0.6486, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 44/83 [15:12<13:28, 20.73s/it][A

loss: tensor(0.5495, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:33<13:06, 20.69s/it][A

tensor(0.7218, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:53<12:46, 20.71s/it][A

tensor(0.5609, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:14<12:24, 20.69s/it][A

loss: tensor(0.5809, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:35<12:03, 20.67s/it]

loss: tensor(0.5700, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 59%|█████▉    | 49/83 [16:55<11:41, 20.64s/it][A

tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:16<11:20, 20.63s/it][A

tensor(0.5652, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:36<10:59, 20.62s/it][A

loss: tensor(0.4666, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [17:57<10:38, 20.59s/it][A

loss: tensor(0.5037, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:17<10:16, 20.55s/it][A

loss: tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:38<09:55, 20.55s/it][A

loss: tensor(0.6415, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [18:58<09:35, 20.54s/it][A

loss: tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:19<09:17, 20.64s/it][A

tensor(0.5200, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:40<08:56, 20.63s/it][A

tensor(0.6325, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:01<08:35, 20.64s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:21<08:14, 20.62s/it][A

tensor(0.7649, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:42<07:54, 20.65s/it][A

tensor(0.5544, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:03<07:34, 20.65s/it][A

tensor(0.7337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:23<07:13, 20.65s/it][A

tensor(0.5623, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:44<06:53, 20.67s/it][A

loss: tensor(0.7348, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:05<06:33, 20.70s/it][A

tensor(0.6072, device='cuda:0', grad_fn=<NllLossBackward>)



 78%|███████▊  | 65/83 [22:25<06:12, 20.72s/it][A

loss: tensor(0.7571, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:46<05:52, 20.74s/it][A

tensor(0.7075, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:07<05:32, 20.77s/it][A

tensor(0.6072, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:28<05:11, 20.78s/it][A

tensor(0.5272, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:49<04:50, 20.77s/it][A

tensor(0.6990, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:09<04:29, 20.72s/it][A

tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:30<04:08, 20.70s/it][A

tensor(0.5263, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:51<03:47, 20.72s/it][A

tensor(0.7793, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:11<03:27, 20.71s/it][A

loss: tensor(0.5602, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:32<03:06, 20.68s/it][A

tensor(0.5525, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:53<02:45, 20.73s/it][A

tensor(0.5819, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:14<02:25, 20.78s/it][A

tensor(0.5659, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:35<02:04, 20.81s/it][A

loss: tensor(0.6802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:55<01:44, 20.84s/it][A

tensor(0.4902, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:16<01:23, 20.89s/it][A

loss: tensor(0.5559, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:37<01:02, 20.85s/it][A

tensor(0.5630, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:58<00:41, 20.79s/it][A

tensor(0.5839, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:19<00:20, 20.76s/it][A

tensor(0.5617, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:26<00:00, 20.56s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.5802, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5988354550068637

	train acc: 0.6627703997809419

	training prec: 0.854222356378725

	training rec: 0.6627703997809419

	training f1: 0.7216266713346601

	Current Learning rate:  0.00011428571428571428



  2%|▏         | 1/42 [00:02<01:47,  2.61s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.65s/it][A
 10%|▉         | 4/42 [00:10<01:40,  2.64s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.65s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:28,  2.67s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.65s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.66s/it][A
 29%|██▊       | 12/42 [00:31<01:20,  2.67s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.65s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.65s/it][A
 36%|███▌      | 15/42 [00:39<01:10,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.62s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.63s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.61s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.5883181911139261

	Validation acc: 0.6805555555555556

	Validation prec: 0.8563432847075076

	Validation rec: 0.6805555555555556

	Validation f1: 0.7351892041733336
loss: 


  1%|          | 1/83 [00:20<28:38, 20.96s/it][A

tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)



  2%|▏         | 2/83 [00:41<28:06, 20.82s/it][A

loss: tensor(0.5014, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:02<27:40, 20.76s/it][A

tensor(0.6516, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:23<27:19, 20.76s/it][A

tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7056, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:43<26:59, 20.76s/it][A

loss: 


  7%|▋         | 6/83 [02:04<26:37, 20.74s/it][A

tensor(0.6172, device='cuda:0', grad_fn=<NllLossBackward>)



  8%|▊         | 7/83 [02:25<26:17, 20.75s/it][A

loss: tensor(0.5447, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:46<25:56, 20.75s/it][A

tensor(0.6269, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:33, 20.73s/it][A

tensor(0.5704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 12%|█▏        | 10/83 [03:27<25:10, 20.69s/it][A

tensor(0.5662, device='cuda:0', grad_fn=<NllLossBackward>)



 13%|█▎        | 11/83 [03:48<24:48, 20.67s/it][A

loss: tensor(0.5531, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:08<24:27, 20.66s/it][A

loss: tensor(0.5999, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:06, 20.66s/it][A

tensor(0.6955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:49<23:45, 20.65s/it][A

tensor(0.6823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5233, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 15/83 [05:10<23:25, 20.66s/it][A

loss: 


 19%|█▉        | 16/83 [05:31<23:04, 20.66s/it][A

tensor(0.5776, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5244, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:51<22:44, 20.67s/it][A

loss: 


 22%|██▏       | 18/83 [06:12<22:23, 20.67s/it][A

tensor(0.6126, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:33<22:03, 20.67s/it][A

loss: tensor(0.5279, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:54<21:45, 20.73s/it][A

loss: tensor(0.5138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:15<21:27, 20.76s/it][A

tensor(0.4952, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:35<21:05, 20.74s/it][A

loss: tensor(0.7111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:56<20:43, 20.73s/it][A

tensor(0.6949, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:17<20:23, 20.74s/it][A

tensor(0.4519, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:37<20:00, 20.69s/it][A

loss: tensor(0.6801, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [08:58<19:38, 20.67s/it][A

loss: tensor(0.5584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:19<19:18, 20.69s/it][A

tensor(0.5808, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:39<18:58, 20.70s/it][A

tensor(0.6185, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:00<18:39, 20.73s/it][A

loss: tensor(0.5741, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:21<18:19, 20.75s/it][A

loss: tensor(0.5043, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:42<17:59, 20.75s/it][A

tensor(0.6326, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:02<17:35, 20.69s/it][A

loss: tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:23<17:13, 20.67s/it][A

tensor(0.5996, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:44<16:52, 20.67s/it][A

tensor(0.5899, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:04<16:30, 20.63s/it][A

tensor(0.7138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6596, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:25<16:08, 20.62s/it][A

loss: 


 45%|████▍     | 37/83 [12:45<15:50, 20.67s/it][A

tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:06<15:30, 20.68s/it][A

tensor(0.5154, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:27<15:10, 20.69s/it][A

loss: tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:48<14:51, 20.72s/it][A

tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:08<14:29, 20.70s/it][A

tensor(0.6340, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:29<14:08, 20.69s/it][A

tensor(0.4475, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:50<13:47, 20.68s/it][A

tensor(0.5935, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:10<13:26, 20.67s/it][A

tensor(0.6369, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:31<13:05, 20.67s/it][A

tensor(0.5359, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:52<12:48, 20.77s/it][A

tensor(0.6401, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:13<12:28, 20.80s/it][A

tensor(0.5678, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:34<12:08, 20.83s/it][A

tensor(0.6356, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:54<11:46, 20.79s/it][A

tensor(0.5626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.6424, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:15<11:25, 20.76s/it][A
 61%|██████▏   | 51/83 [17:36<11:03, 20.73s/it][A

loss: tensor(0.5513, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:57<10:43, 20.76s/it][A

tensor(0.5544, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:17<10:22, 20.75s/it][A

loss: tensor(0.5871, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:38<10:02, 20.76s/it][A

tensor(0.6890, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [18:59<09:41, 20.76s/it][A

tensor(0.5826, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:20<09:21, 20.80s/it][A

tensor(0.8815, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:40<08:59, 20.77s/it][A

tensor(0.6062, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:01<08:40, 20.81s/it][A

tensor(0.6310, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:22<08:19, 20.83s/it][A

loss: tensor(0.5687, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:43<07:59, 20.83s/it][A

tensor(0.6043, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:04<07:37, 20.79s/it][A

tensor(0.6823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:24<07:15, 20.72s/it][A

tensor(0.5739, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:45<06:52, 20.65s/it][A

tensor(0.6101, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 64/83 [22:05<06:31, 20.62s/it][A

loss: tensor(0.6081, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:26<06:11, 20.62s/it][A

tensor(0.5258, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:47<05:50, 20.64s/it][A

loss: tensor(0.5025, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:07<05:30, 20.64s/it][A

tensor(0.6313, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:28<05:09, 20.61s/it][A

tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:48<04:48, 20.61s/it][A

loss: tensor(0.5594, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:09<04:28, 20.62s/it][A

loss: tensor(0.5796, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:30<04:07, 20.61s/it][A

tensor(0.6972, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:50<03:46, 20.62s/it][A

tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:11<03:26, 20.63s/it][A

tensor(0.5644, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 89%|████████▉ | 74/83 [25:32<03:05, 20.63s/it][A

tensor(0.6677, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 90%|█████████ | 75/83 [25:52<02:44, 20.60s/it][A

tensor(0.5674, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:13<02:24, 20.59s/it][A

tensor(0.6241, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:33<02:03, 20.55s/it][A

loss: tensor(0.5150, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [26:54<01:42, 20.57s/it][A

loss: tensor(0.5556, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:14<01:22, 20.53s/it][A

loss: tensor(0.5737, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:35<01:01, 20.55s/it][A

tensor(0.6098, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:56<00:41, 20.65s/it][A

tensor(0.5502, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:16<00:20, 20.65s/it][A

tensor(0.5098, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:23<00:00, 20.53s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.6118, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.596425178180258

	train acc: 0.6571399233296823

	training prec: 0.8530605855466701

	training rec: 0.6571399233296823

	training f1: 0.7158579268702416

	Current Learning rate:  8.571428571428571e-05



  2%|▏         | 1/42 [00:02<01:50,  2.69s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.67s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.66s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.67s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.65s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.66s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.67s/it][A
 26%|██▌       | 11/42 [00:29<01:22,  2.65s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.65s/it][A
 31%|███       | 13/42 [00:34<01:18,  2.70s/it][A
 33%|███▎      | 14/42 [00:37<01:14,  2.67s/it][A
 36%|███▌      | 15/42 [00:39<01:12,  2.67s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.67s/it][A
 40%|████      | 17/42 [00:45<01:06,  2.66s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.66s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5949143845410574

	Validation acc: 0.6966765873015873

	Validation prec: 0.8437099052610493

	Validation rec: 0.6966765873015873

	Validation f1: 0.7437251751145838
loss: 


  1%|          | 1/83 [00:20<28:21, 20.75s/it][A

tensor(0.5501, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:55, 20.68s/it][A

tensor(0.5497, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:01<27:29, 20.62s/it][A

loss: tensor(0.6878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:09, 20.62s/it][A

tensor(0.4789, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:43<26:49, 20.63s/it][A

tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  7%|▋         | 6/83 [02:04<26:34, 20.71s/it][A

tensor(0.7775, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:16, 20.74s/it][A

tensor(0.6904, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:45<25:56, 20.75s/it][A

loss: tensor(0.5337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 11%|█         | 9/83 [03:06<25:38, 20.79s/it][A

tensor(0.6243, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5917, device='cuda:0', grad_fn=<NllLossBackward>)


 12%|█▏        | 10/83 [03:27<25:19, 20.81s/it][A

loss: 


 13%|█▎        | 11/83 [03:48<24:57, 20.79s/it][A

tensor(0.8619, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 12/83 [04:08<24:34, 20.77s/it][A

loss: tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:29<24:11, 20.73s/it][A

tensor(0.6310, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:50<23:50, 20.73s/it][A

tensor(0.5270, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5612, device='cuda:0', grad_fn=<NllLossBackward>)


 18%|█▊        | 15/83 [05:10<23:28, 20.71s/it][A

loss: 


 19%|█▉        | 16/83 [05:31<23:08, 20.73s/it][A

tensor(0.5711, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:52<22:47, 20.72s/it][A

loss: tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:13<22:27, 20.74s/it][A

tensor(0.4947, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 19/83 [06:33<22:07, 20.74s/it][A

loss: tensor(0.6008, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:54<21:41, 20.66s/it][A

loss: tensor(0.5153, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:14<21:19, 20.64s/it][A

tensor(0.6227, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:35<20:59, 20.65s/it][A

tensor(0.5798, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [07:56<20:39, 20.67s/it][A

loss: tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:17<20:19, 20.67s/it][A

loss: tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:37<19:59, 20.69s/it][A

loss: tensor(0.6458, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [08:58<19:37, 20.66s/it][A

loss: tensor(0.5586, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:18<19:15, 20.64s/it][A

tensor(0.5224, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:39<18:55, 20.64s/it][A

tensor(0.5457, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 35%|███▍      | 29/83 [10:00<18:34, 20.63s/it][A

tensor(0.7434, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:20<18:12, 20.61s/it][A

loss: tensor(0.4340, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 31/83 [10:41<17:53, 20.64s/it][A

loss: tensor(0.5237, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [11:02<17:33, 20.65s/it][A

loss: tensor(0.5111, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:22<17:13, 20.67s/it][A

tensor(0.5249, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:43<16:54, 20.70s/it][A

loss: tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:04<16:32, 20.69s/it][A

loss: tensor(0.5813, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:24<16:11, 20.67s/it][A

tensor(0.5702, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:45<15:52, 20.70s/it][A

tensor(0.6881, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:06<15:31, 20.69s/it][A

tensor(0.4397, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 47%|████▋     | 39/83 [13:27<15:15, 20.80s/it][A

tensor(0.5068, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:48<14:56, 20.84s/it][A

tensor(0.5823, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:09<14:35, 20.85s/it][A

tensor(0.6099, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:30<14:24, 21.09s/it][A

tensor(0.8755, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 43/83 [14:51<13:59, 20.98s/it][A

loss: tensor(0.5878, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:12<13:35, 20.90s/it][A

tensor(0.5338, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6085, device='cuda:0', grad_fn=<NllLossBackward>)


 54%|█████▍    | 45/83 [15:32<13:10, 20.81s/it][A
 55%|█████▌    | 46/83 [15:53<12:48, 20.76s/it][A

loss: tensor(0.5633, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 57%|█████▋    | 47/83 [16:14<12:26, 20.75s/it][A

tensor(0.6802, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:35<12:06, 20.75s/it][A

tensor(0.5930, device='cuda:0', grad_fn=<NllLossBackward>)



 59%|█████▉    | 49/83 [16:55<11:44, 20.73s/it][A

loss: tensor(0.5262, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:16<11:23, 20.71s/it]

loss: tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 61%|██████▏   | 51/83 [17:37<11:01, 20.69s/it][A

tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 63%|██████▎   | 52/83 [17:57<10:41, 20.71s/it][A

tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 53/83 [18:18<10:22, 20.74s/it][A

loss: tensor(0.6590, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▌   | 54/83 [18:39<10:00, 20.72s/it][A

loss: tensor(0.6495, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:00<09:40, 20.73s/it][A

tensor(0.5327, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:20<09:19, 20.73s/it][A

loss: tensor(0.5636, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:41<08:59, 20.75s/it][A

tensor(0.5612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 70%|██████▉   | 58/83 [20:02<08:39, 20.79s/it][A

tensor(0.5337, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:23<08:17, 20.73s/it][A

tensor(0.7150, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:43<07:55, 20.67s/it][A

tensor(0.5193, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [21:04<07:33, 20.63s/it][A

loss: tensor(0.6923, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:24<07:12, 20.61s/it][A

tensor(0.5018, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 63/83 [21:45<06:52, 20.60s/it]

loss: tensor(0.5426, device='cuda:0', grad_fn=<NllLossBackward>)


[A

loss: 


 77%|███████▋  | 64/83 [22:05<06:31, 20.59s/it][A

tensor(0.5466, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:26<06:10, 20.60s/it][A

tensor(0.6420, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:46<05:49, 20.57s/it][A

tensor(0.5584, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:07<05:29, 20.59s/it][A

tensor(0.5837, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:28<05:08, 20.56s/it][A

tensor(0.6473, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6109, device='cuda:0', grad_fn=<NllLossBackward>)


 83%|████████▎ | 69/83 [23:48<04:47, 20.55s/it][A

loss: 


 84%|████████▍ | 70/83 [24:09<04:27, 20.56s/it][A

tensor(0.6096, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:29<04:06, 20.58s/it][A

tensor(0.5515, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:50<03:46, 20.60s/it][A

tensor(0.5502, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 73/83 [25:11<03:25, 20.60s/it][A

loss: tensor(0.6552, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5079, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 74/83 [25:31<03:05, 20.62s/it][A
 90%|█████████ | 75/83 [25:52<02:44, 20.62s/it][A

loss: tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 92%|█████████▏| 76/83 [26:13<02:24, 20.66s/it][A

tensor(0.7129, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:33<02:04, 20.68s/it][A

tensor(0.6187, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [26:54<01:43, 20.69s/it][A

loss: tensor(0.5250, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:15<01:22, 20.68s/it][A

loss: tensor(0.6612, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 96%|█████████▋| 80/83 [27:35<01:02, 20.71s/it][A

tensor(0.5407, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:56<00:41, 20.75s/it][A

tensor(0.5574, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:17<00:20, 20.72s/it][A

tensor(0.6100, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:24<00:00, 20.54s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.7697, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.597203834229205

	train acc: 0.6705058871851041

	training prec: 0.8530673094529497

	training rec: 0.6705058871851041

	training f1: 0.726103696386914

	Current Learning rate:  5.714285714285714e-05



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:47,  2.69s/it][A
  7%|▋         | 3/42 [00:08<01:43,  2.66s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:38,  2.67s/it][A
 14%|█▍        | 6/42 [00:16<01:35,  2.66s/it][A
 17%|█▋        | 7/42 [00:18<01:33,  2.67s/it][A
 19%|█▉        | 8/42 [00:21<01:30,  2.68s/it][A
 21%|██▏       | 9/42 [00:24<01:28,  2.67s/it][A
 24%|██▍       | 10/42 [00:26<01:25,  2.68s/it][A
 26%|██▌       | 11/42 [00:29<01:23,  2.68s/it][A
 29%|██▊       | 12/42 [00:32<01:19,  2.66s/it][A
 31%|███       | 13/42 [00:34<01:17,  2.67s/it][A
 33%|███▎      | 14/42 [00:37<01:15,  2.69s/it][A
 36%|███▌      | 15/42 [00:40<01:12,  2.68s/it][A
 38%|███▊      | 16/42 [00:42<01:09,  2.69s/it][A
 40%|████      | 17/42 [00:45<01:07,  2.69s/it][A
 43%|████▎     | 18/42 [00:48<01:04,  2.67s/it][A
 45%|████▌     | 19/42 [00:50<01:01,  2.67s/it][A
 48%|████▊     | 20/42 [00:53<00:58,  2


	Validation loss: 0.5934344203699202

	Validation acc: 0.6517857142857143

	Validation prec: 0.8570227984579868

	Validation rec: 0.6517857142857143

	Validation f1: 0.7141190862710134
loss: 


  1%|          | 1/83 [00:20<28:19, 20.73s/it][A

tensor(0.5203, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:42<28:40, 21.24s/it][A

tensor(0.6014, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 3/83 [01:03<28:29, 21.37s/it][A

loss: tensor(0.6041, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:24<27:51, 21.16s/it][A

tensor(0.5334, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  6%|▌         | 5/83 [01:46<27:54, 21.47s/it][A

tensor(0.6453, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 6/83 [02:08<27:40, 21.56s/it][A

loss: tensor(0.6705, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:30<27:24, 21.64s/it][A

tensor(0.5632, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 8/83 [02:51<26:55, 21.54s/it][A

loss: tensor(0.5518, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:12<26:27, 21.45s/it][A

loss: tensor(0.5436, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 10/83 [03:33<25:48, 21.21s/it][A

loss: tensor(0.5685, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 13%|█▎        | 11/83 [03:54<25:13, 21.02s/it][A

tensor(0.6843, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:14<24:40, 20.85s/it][A

tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:35<24:14, 20.78s/it][A

tensor(0.5149, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 17%|█▋        | 14/83 [04:55<23:51, 20.75s/it][A

tensor(0.5592, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 18%|█▊        | 15/83 [05:16<23:25, 20.67s/it][A

tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 16/83 [05:36<23:01, 20.62s/it][A

loss: tensor(0.5658, device='cuda:0', grad_fn=<NllLossBackward>)



 20%|██        | 17/83 [05:57<22:37, 20.57s/it][A

loss: tensor(0.4692, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.8421, device='cuda:0', grad_fn=<NllLossBackward>)



 22%|██▏       | 18/83 [06:17<22:18, 20.59s/it][A

loss: 


 23%|██▎       | 19/83 [06:38<22:02, 20.66s/it][A

tensor(0.4812, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 20/83 [06:59<21:43, 20.69s/it][A

loss: tensor(0.6164, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:20<21:24, 20.72s/it][A

tensor(0.4841, device='cuda:0', grad_fn=<NllLossBackward>)



 27%|██▋       | 22/83 [07:40<21:03, 20.71s/it][A

loss: tensor(0.5970, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 23/83 [08:01<20:41, 20.69s/it][A

loss: tensor(0.7461, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 29%|██▉       | 24/83 [08:22<20:21, 20.71s/it][A

tensor(0.8024, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|███       | 25/83 [08:43<20:06, 20.80s/it][A

loss: tensor(0.6104, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [09:04<19:44, 20.79s/it][A

loss: tensor(0.5328, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 27/83 [09:24<19:21, 20.74s/it][A

loss: tensor(0.5201, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 34%|███▎      | 28/83 [09:45<18:59, 20.71s/it][A

tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [10:06<18:37, 20.70s/it][A

loss: tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 30/83 [10:26<18:17, 20.70s/it][A

loss: tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:47<17:56, 20.69s/it][A

tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 39%|███▊      | 32/83 [11:08<17:35, 20.69s/it][A

tensor(0.6320, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:29<17:16, 20.74s/it][A

tensor(0.4817, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 41%|████      | 34/83 [11:49<16:58, 20.78s/it][A

tensor(0.5182, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 35/83 [12:10<16:36, 20.76s/it][A

loss: tensor(0.6175, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 36/83 [12:31<16:17, 20.80s/it][A

loss: tensor(0.6729, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 45%|████▍     | 37/83 [12:52<15:53, 20.74s/it][A

tensor(0.6223, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:12<15:30, 20.69s/it][A

tensor(0.5804, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:33<15:09, 20.66s/it][A

loss: tensor(0.5704, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 48%|████▊     | 40/83 [13:53<14:47, 20.63s/it][A

tensor(0.6616, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:14<14:27, 20.64s/it][A

tensor(0.7012, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 51%|█████     | 42/83 [14:35<14:07, 20.67s/it][A

tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 52%|█████▏    | 43/83 [14:55<13:45, 20.64s/it][A

tensor(0.5216, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:16<13:23, 20.61s/it][A

tensor(0.5470, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 45/83 [15:36<13:03, 20.63s/it][A

loss: tensor(0.5283, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 55%|█████▌    | 46/83 [15:57<12:44, 20.67s/it][A

tensor(0.5238, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:18<12:22, 20.64s/it][A

loss: tensor(0.6502, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 58%|█████▊    | 48/83 [16:38<12:01, 20.61s/it][A

tensor(0.6175, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:59<11:40, 20.61s/it][A

tensor(0.5316, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|██████    | 50/83 [17:20<11:22, 20.68s/it][A

loss: tensor(0.6965, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:40<11:01, 20.66s/it][A

loss: tensor(0.5626, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [18:01<10:39, 20.62s/it][A

loss: tensor(0.6419, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5278, device='cuda:0', grad_fn=<NllLossBackward>)


 64%|██████▍   | 53/83 [18:22<10:18, 20.61s/it][A

loss: 


 65%|██████▌   | 54/83 [18:42<09:58, 20.62s/it][A

tensor(0.5761, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 66%|██████▋   | 55/83 [19:03<09:37, 20.61s/it][A

tensor(0.5859, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 56/83 [19:23<09:16, 20.60s/it][A

loss: tensor(0.5749, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 69%|██████▊   | 57/83 [19:44<08:55, 20.59s/it][A

tensor(0.6345, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [20:04<08:34, 20.58s/it][A

loss: tensor(0.5865, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 71%|███████   | 59/83 [20:25<08:15, 20.63s/it][A

tensor(0.5180, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 72%|███████▏  | 60/83 [20:46<07:54, 20.64s/it][A

tensor(0.6005, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 73%|███████▎  | 61/83 [21:07<07:34, 20.65s/it][A

tensor(0.6138, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 75%|███████▍  | 62/83 [21:27<07:13, 20.67s/it][A

tensor(0.5629, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:48<06:54, 20.71s/it][A

tensor(0.6708, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [22:09<06:32, 20.65s/it][A

tensor(0.5539, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:29<06:11, 20.63s/it][A

tensor(0.5763, device='cuda:0', grad_fn=<NllLossBackward>)



 80%|███████▉  | 66/83 [22:50<05:50, 20.61s/it][A

loss: tensor(0.4914, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 81%|████████  | 67/83 [23:10<05:29, 20.61s/it][A

tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:31<05:09, 20.62s/it][A

tensor(0.5650, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 69/83 [23:52<04:48, 20.59s/it][A

loss: tensor(0.5527, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5354, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 70/83 [24:12<04:27, 20.61s/it][A

loss: 


 86%|████████▌ | 71/83 [24:33<04:07, 20.64s/it][A

tensor(0.4667, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:54<03:47, 20.66s/it][A

tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 88%|████████▊ | 73/83 [25:14<03:26, 20.68s/it][A

tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)


 89%|████████▉ | 74/83 [25:35<03:05, 20.63s/it][A

loss: 


 90%|█████████ | 75/83 [25:55<02:44, 20.62s/it][A

tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)



 92%|█████████▏| 76/83 [26:16<02:24, 20.58s/it][A

loss: tensor(0.6148, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 77/83 [26:36<02:03, 20.57s/it][A

loss: tensor(0.5321, device='cuda:0', grad_fn=<NllLossBackward>)



 94%|█████████▍| 78/83 [26:57<01:42, 20.56s/it][A

loss: tensor(0.6361, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:18<01:22, 20.61s/it][A

loss: tensor(0.5927, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 80/83 [27:38<01:01, 20.60s/it][A

loss: tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 81/83 [27:59<00:41, 20.60s/it][A

loss: tensor(0.6143, device='cuda:0', grad_fn=<NllLossBackward>)



 99%|█████████▉| 82/83 [28:19<00:20, 20.59s/it][A

loss: tensor(0.4913, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:27<00:00, 20.57s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.8480, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5951999494110245

	train acc: 0.6669290799561883

	training prec: 0.8537556214254829

	training rec: 0.6669290799561883

	training f1: 0.7236845437002485

	Current Learning rate:  2.857142857142857e-05



  2%|▏         | 1/42 [00:02<01:47,  2.61s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.64s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.64s/it][A
 10%|▉         | 4/42 [00:10<01:39,  2.63s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.64s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.64s/it][A
 21%|██▏       | 9/42 [00:23<01:27,  2.64s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.63s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.64s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.64s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.65s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.63s/it][A
 40%|████      | 17/42 [00:44<01:06,  2.64s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.62s/it][A
 48%|████▊     | 20/42 [00:52<00:57,  2


	Validation loss: 0.5845594221637362

	Validation acc: 0.6560019841269841

	Validation prec: 0.8627782969293052

	Validation rec: 0.6560019841269841

	Validation f1: 0.7175295636174112
loss: 


  1%|          | 1/83 [00:20<28:13, 20.65s/it][A

tensor(0.5506, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  2%|▏         | 2/83 [00:41<27:53, 20.66s/it][A

tensor(0.5436, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  4%|▎         | 3/83 [01:01<27:30, 20.63s/it][A

tensor(0.6716, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  5%|▍         | 4/83 [01:22<27:06, 20.58s/it][A

tensor(0.5183, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.7431, device='cuda:0', grad_fn=<NllLossBackward>)



  6%|▌         | 5/83 [01:43<26:48, 20.62s/it][A

loss: 


  7%|▋         | 6/83 [02:03<26:32, 20.68s/it][A

tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


  8%|▊         | 7/83 [02:24<26:12, 20.69s/it][A

tensor(0.7818, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 10%|▉         | 8/83 [02:45<25:50, 20.67s/it][A

tensor(0.5686, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 9/83 [03:05<25:30, 20.69s/it][A

loss: tensor(0.5806, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5929, device='cuda:0', grad_fn=<NllLossBackward>)


 12%|█▏        | 10/83 [03:26<25:10, 20.69s/it][A

loss: 


 13%|█▎        | 11/83 [03:47<24:51, 20.71s/it][A

tensor(0.5961, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 14%|█▍        | 12/83 [04:08<24:32, 20.74s/it][A

tensor(0.5927, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 16%|█▌        | 13/83 [04:28<24:10, 20.73s/it][A

tensor(0.6260, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.6088, device='cuda:0', grad_fn=<NllLossBackward>)


 17%|█▋        | 14/83 [04:49<23:47, 20.69s/it][A

loss: 


 18%|█▊        | 15/83 [05:10<23:28, 20.72s/it][A

tensor(0.5787, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 19%|█▉        | 16/83 [05:30<23:06, 20.69s/it][A

tensor(0.5917, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 20%|██        | 17/83 [05:51<22:44, 20.68s/it][A

tensor(0.7074, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 22%|██▏       | 18/83 [06:12<22:20, 20.63s/it][A

tensor(0.5265, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 23%|██▎       | 19/83 [06:32<21:58, 20.60s/it][A

tensor(0.5842, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 24%|██▍       | 20/83 [06:53<21:37, 20.59s/it][A

tensor(0.5941, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 25%|██▌       | 21/83 [07:13<21:17, 20.60s/it][A

tensor(0.5556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 27%|██▋       | 22/83 [07:34<20:55, 20.57s/it][A

tensor(0.7589, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 28%|██▊       | 23/83 [07:54<20:34, 20.58s/it][A

tensor(0.5651, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▉       | 24/83 [08:15<20:13, 20.57s/it][A

loss: tensor(0.7288, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 30%|███       | 25/83 [08:36<19:53, 20.58s/it][A

tensor(0.6943, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███▏      | 26/83 [08:56<19:32, 20.57s/it][A

loss: tensor(0.5145, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 33%|███▎      | 27/83 [09:17<19:12, 20.58s/it][A

tensor(0.5167, device='cuda:0', grad_fn=<NllLossBackward>)



 34%|███▎      | 28/83 [09:37<18:48, 20.52s/it][A

loss: tensor(0.6189, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▍      | 29/83 [09:58<18:27, 20.50s/it][A

loss: tensor(0.5639, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 36%|███▌      | 30/83 [10:18<18:06, 20.50s/it][A

tensor(0.5151, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 37%|███▋      | 31/83 [10:39<17:46, 20.51s/it][A

tensor(0.5789, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 32/83 [10:59<17:24, 20.49s/it][A

loss: tensor(0.6537, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 40%|███▉      | 33/83 [11:20<17:04, 20.49s/it][A

tensor(0.5347, device='cuda:0', grad_fn=<NllLossBackward>)



 41%|████      | 34/83 [11:40<16:44, 20.50s/it][A

loss: tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 42%|████▏     | 35/83 [12:01<16:25, 20.52s/it][A

tensor(0.5663, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 43%|████▎     | 36/83 [12:21<16:05, 20.54s/it][A

tensor(0.4761, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▍     | 37/83 [12:42<15:45, 20.55s/it][A

loss: tensor(0.5976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 46%|████▌     | 38/83 [13:02<15:25, 20.56s/it][A

tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 39/83 [13:23<15:04, 20.55s/it][A

loss: tensor(0.6032, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 40/83 [13:44<14:45, 20.58s/it][A

loss: tensor(0.5752, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 49%|████▉     | 41/83 [14:04<14:23, 20.57s/it][A

tensor(0.6279, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.5021, device='cuda:0', grad_fn=<NllLossBackward>)


 51%|█████     | 42/83 [14:25<14:03, 20.57s/it][A

loss: 


 52%|█████▏    | 43/83 [14:45<13:43, 20.58s/it][A

tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 53%|█████▎    | 44/83 [15:06<13:21, 20.56s/it][A

tensor(0.6556, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 54%|█████▍    | 45/83 [15:26<13:01, 20.56s/it][A

tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▌    | 46/83 [15:47<12:41, 20.57s/it][A

loss: tensor(0.6130, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 47/83 [16:07<12:19, 20.54s/it][A

loss: tensor(0.5837, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 48/83 [16:28<11:58, 20.52s/it][A

loss: tensor(0.6273, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 59%|█████▉    | 49/83 [16:48<11:38, 20.54s/it][A

tensor(0.5373, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 60%|██████    | 50/83 [17:09<11:17, 20.53s/it][A

tensor(0.5337, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 51/83 [17:29<10:56, 20.52s/it][A

loss: tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 52/83 [17:50<10:37, 20.55s/it][A

loss: tensor(0.5375, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 64%|██████▍   | 53/83 [18:11<10:18, 20.61s/it][A

tensor(0.4976, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 65%|██████▌   | 54/83 [18:31<09:57, 20.60s/it][A

tensor(0.5918, device='cuda:0', grad_fn=<NllLossBackward>)



 66%|██████▋   | 55/83 [18:52<09:37, 20.62s/it][A

loss: tensor(0.6402, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 67%|██████▋   | 56/83 [19:13<09:16, 20.62s/it][A

tensor(0.5743, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▊   | 57/83 [19:33<08:55, 20.59s/it][A

loss: tensor(0.5686, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|██████▉   | 58/83 [19:54<08:35, 20.63s/it][A

loss: tensor(0.5695, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████   | 59/83 [20:15<08:14, 20.61s/it][A

loss: tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 60/83 [20:35<07:53, 20.60s/it][A

loss: tensor(0.5995, device='cuda:0', grad_fn=<NllLossBackward>)



 73%|███████▎  | 61/83 [20:56<07:34, 20.65s/it][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▍  | 62/83 [21:17<07:13, 20.66s/it][A

loss: tensor(0.6231, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 76%|███████▌  | 63/83 [21:37<06:53, 20.68s/it][A

tensor(0.5011, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 77%|███████▋  | 64/83 [21:58<06:34, 20.77s/it][A

tensor(0.6142, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 78%|███████▊  | 65/83 [22:19<06:11, 20.66s/it][A

tensor(0.6603, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 80%|███████▉  | 66/83 [22:39<05:49, 20.58s/it][A

tensor(0.5386, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 67/83 [22:59<05:28, 20.51s/it][A

loss: tensor(0.4593, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 82%|████████▏ | 68/83 [23:20<05:07, 20.51s/it][A

tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 83%|████████▎ | 69/83 [23:40<04:47, 20.52s/it][A

tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 84%|████████▍ | 70/83 [24:01<04:27, 20.54s/it][A

tensor(0.5260, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 86%|████████▌ | 71/83 [24:22<04:06, 20.54s/it][A

tensor(0.7929, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 87%|████████▋ | 72/83 [24:42<03:46, 20.57s/it][A

tensor(0.5739, device='cuda:0', grad_fn=<NllLossBackward>)





loss: tensor(0.7300, device='cuda:0', grad_fn=<NllLossBackward>)


 88%|████████▊ | 73/83 [25:03<03:25, 20.57s/it][A

loss: 


 89%|████████▉ | 74/83 [25:23<03:05, 20.58s/it][A

tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)
loss: tensor(0.5402, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 75/83 [25:44<02:44, 20.56s/it][A

loss: 


 92%|█████████▏| 76/83 [26:04<02:23, 20.56s/it][A

tensor(0.5486, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 93%|█████████▎| 77/83 [26:25<02:03, 20.63s/it][A

tensor(0.6941, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 94%|█████████▍| 78/83 [26:46<01:43, 20.66s/it][A

tensor(0.5421, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 79/83 [27:07<01:22, 20.69s/it]

loss: tensor(0.5753, device='cuda:0', grad_fn=<NllLossBackward>)


[A
 96%|█████████▋| 80/83 [27:28<01:02, 20.73s/it][A

loss: tensor(0.5626, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 98%|█████████▊| 81/83 [27:48<00:41, 20.74s/it][A

tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)
loss: 


 99%|█████████▉| 82/83 [28:09<00:20, 20.73s/it][A

tensor(0.6184, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 83/83 [28:16<00:00, 20.44s/it][A

  0%|          | 0/42 [00:00<?, ?it/s][A

loss: tensor(0.4428, device='cuda:0', grad_fn=<NllLossBackward>)

	Train loss: 0.5971848203475216

	train acc: 0.6552060514786419

	training prec: 0.8552690684630818

	training rec: 0.6552060514786419

	training f1: 0.7152692869077536

	Current Learning rate:  0.0



  2%|▏         | 1/42 [00:02<01:49,  2.68s/it][A
  5%|▍         | 2/42 [00:05<01:45,  2.65s/it][A
  7%|▋         | 3/42 [00:07<01:43,  2.67s/it][A
 10%|▉         | 4/42 [00:10<01:41,  2.66s/it][A
 12%|█▏        | 5/42 [00:13<01:37,  2.64s/it][A
 14%|█▍        | 6/42 [00:15<01:35,  2.65s/it][A
 17%|█▋        | 7/42 [00:18<01:32,  2.63s/it][A
 19%|█▉        | 8/42 [00:21<01:29,  2.63s/it][A
 21%|██▏       | 9/42 [00:23<01:26,  2.63s/it][A
 24%|██▍       | 10/42 [00:26<01:24,  2.64s/it][A
 26%|██▌       | 11/42 [00:29<01:21,  2.62s/it][A
 29%|██▊       | 12/42 [00:31<01:19,  2.63s/it][A
 31%|███       | 13/42 [00:34<01:16,  2.64s/it][A
 33%|███▎      | 14/42 [00:36<01:13,  2.64s/it][A
 36%|███▌      | 15/42 [00:39<01:11,  2.63s/it][A
 38%|███▊      | 16/42 [00:42<01:08,  2.64s/it][A
 40%|████      | 17/42 [00:44<01:05,  2.63s/it][A
 43%|████▎     | 18/42 [00:47<01:03,  2.64s/it][A
 45%|████▌     | 19/42 [00:50<01:00,  2.65s/it][A
 48%|████▊     | 20/42 [00:52<00:58,  2


	Validation loss: 0.5864656908171517

	Validation acc: 0.6577380952380952

	Validation prec: 0.8651683149592261

	Validation rec: 0.6577380952380952

	Validation f1: 0.7179055414240784





In [21]:
# train_loss.numpy()
# train_loss


In [22]:
# # Saving models
# torch.save(model.state_dict(), "finetuned.pth")

# #load models
# model = Neural

In [23]:
## saving the model 
torch.save(model.state_dict(), "finetuned-35-epochs-1e3-lr-with-weighted-loss.pth")

In [21]:
# loading the locally saved model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = CausalityBERT()
model.load_state_dict(torch.load("finetuned-35-epochs-1e3-lr-with-weighted-loss.pth"))
## Move the model to the GPU 
model.to(device)
model.eval() # gettign in the eval mode 



You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.6.attention.self.key.bias', 'roberta.encoder.layer.9.intermediate.dense.weight', 'roberta.encoder.layer.8.attention.self.value.bias', 'roberta.encoder.layer.5.attention.self.query.bias', 'roberta.encoder.layer.9.attention.output.dense.bias', 'roberta.encoder.layer.5.attention.self.value.weight', 'roberta.encoder.layer.7.attention.output.LayerNorm.weight', 'roberta.encoder.layer.6.attention.output.LayerNorm.bias', 'roberta.encoder.layer.10.attention.self.value.weight', 'roberta.encoder.layer.9.attention.self.key.bias', 'roberta.encoder.layer.6.intermediate.dense.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'lm_head.dense.bias', 'roberta.encoder.layer.4.intermediate.dense.weight', 'roberta.e

CausalityBERT(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(130, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True

## evaluation on the test dataset 



In [22]:
## evaluation on the test dataset 

# test_accuracy, test_mcc_accuracy, nb_test_steps = 0, 0, 0 
# test_mcc_accuracy, nb_test_steps = 0, 0,

# test_accuracy = []
# test_loss = []
# test_acc = []
# test_prec = []
# test_rec = []
# test_f1 = []

# for batch in tqdm(test_loader):
#     batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
#     b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader
#     with torch.no_grad():
#         model.eval()
#         logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
    
    
#     loss = loss_fn(logits, b_labels)
#     test_loss.append(loss.item())

#     # move logits and labels to CPU
#     logits = logits.detach().to('cpu').numpy()
#     label_ids = b_labels.to('cpu').numpy()

#     pred_flat = np.argmax(logits, axis=1).flatten()
#     labels_flat = label_ids.flatten()

# #     eval_accuracy += accuracy_score(labels_flat, pred_flat)
# #     test_mcc_accuracy += matthews_corrcoef(labels_flat, pred_flat)  


#     metrics = compute_metrics(pred_flat, labels_flat)
#     test_acc.append(metrics["accuracy"])
#     test_prec.append(metrics["precision"])
#     test_rec.append(metrics["recall"])
#     test_f1.append(metrics["f1"])
# #     nb_test_steps += 1
    

# # print(F'\n\tValidation Accuracy: {eval_accuracy / nb_eval_steps}')
# # print(F'\n\ttest MCC Accuracy: {test_mcc_accuracy / nb_test_steps}') # eval M
# print(F'\n\ttest loss: {np.mean(test_loss)}')
# print(F'\n\ttest acc: {np.mean(test_acc)}')
# print(F'\n\ttest prec: {np.mean(test_prec)}')
# print(F'\n\ttest rec: {np.mean(test_rec)}')
# print(F'\n\ttest f1: {np.mean(test_f1)}')

In [23]:
loss_fn = CrossEntropyLoss()
test_loss = []
test_acc = []
test_prec = []
test_rec = []
test_f1 = []


for batch in tqdm(test_loader):
    batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
    b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch     # unpack inputs from dataloader

    with torch.no_grad():
        model.eval()
        logits = model(**{"input_ids":b_input_ids, "attention_mask":b_input_mask, "token_type_ids":b_token_type_ids}) # forward pass, calculates logit predictions 
    
    print("logits:")
    print(logits)
    # move logits and labels to CPU
    logits = logits.detach().to('cpu').numpy()
    label_ids = b_labels.to('cpu').numpy()

    pred_flat = np.argmax(logits, axis=1).flatten()
    labels_flat = label_ids.flatten()
    print("pred:", pred_flat)
    print("real:", labels_flat)
    
    metrics = compute_metrics(pred_flat, labels_flat)
    test_acc.append(metrics["accuracy"])
    test_prec.append(metrics["precision"])
    test_rec.append(metrics["recall"])
    test_f1.append(metrics["f1"])

    
print(F'\n\ttest loss: {np.mean(test_loss)}')
print(F'\n\ttest acc: {np.mean(test_acc)}')
print(F'\n\ttest prec: {np.mean(test_prec)}')
print(F'\n\ttest rec: {np.mean(test_rec)}')
print(F'\n\ttest f1: {np.mean(test_f1)}')





  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
  2%|▏         | 1/52 [00:03<03:05,  3.64s/it]

logits:
tensor([[-3.1747e-04, -5.1582e-01],
        [-6.7524e-01,  6.5175e-01],
        [ 3.6325e-01, -1.3585e+00],
        [ 4.2805e-01, -1.4057e+00],
        [-4.0651e-01,  9.8244e-02],
        [ 6.9670e-01, -1.8318e+00],
        [-3.1569e-01, -4.9775e-02],
        [-5.3803e-01,  4.3827e-01],
        [ 1.1968e-01, -8.4351e-01],
        [ 2.2878e-01, -1.0077e+00],
        [-2.8517e-01, -1.5058e-01],
        [-8.7587e-02, -5.1159e-01],
        [-4.6455e-01,  3.2247e-01],
        [ 1.3987e-01, -9.7327e-01],
        [ 2.7935e-03, -6.8200e-01],
        [-3.6148e-01,  3.5646e-02],
        [ 3.3575e-01, -1.4036e+00],
        [-1.2240e-01, -4.6167e-01],
        [-3.8611e-01,  5.7118e-02],
        [ 2.9273e-01, -1.2428e+00],
        [-5.4529e-02, -4.6256e-01],
        [ 5.6206e-01, -1.5911e+00],
        [ 2.6112e-01, -1.1743e+00],
        [ 4.9412e-01, -1.5829e+00],
        [ 7.3927e-01, -2.1908e+00],
        [ 1.1422e-01, -8.4484e-01],
        [-2.8188e-01, -1.2194e-01],
        [ 1.6685e-01

  4%|▍         | 2/52 [00:06<02:49,  3.39s/it]

logits:
tensor([[-0.4290,  0.2924],
        [ 0.1283, -0.8003],
        [-0.4235,  0.1021],
        [-0.0295, -0.6166],
        [-0.2124, -0.2862],
        [-0.1744, -0.2781],
        [ 0.2958, -1.3678],
        [ 0.1016, -0.7903],
        [-0.3986,  0.1642],
        [ 0.0721, -0.7466],
        [-0.6245,  0.5949],
        [ 0.5548, -1.8758],
        [-0.4191,  0.2653],
        [-0.1847, -0.3469],
        [ 0.2976, -1.1465],
        [ 0.3389, -1.2360],
        [ 0.0116, -0.6696],
        [-0.3106, -0.0998],
        [-0.1666, -0.2937],
        [ 0.5689, -1.7598],
        [ 0.3204, -1.3076],
        [-0.3931,  0.0785],
        [ 0.5055, -1.6157],
        [-0.5261,  0.4059],
        [-0.4135,  0.1787],
        [-0.0133, -0.6439],
        [-0.2567, -0.1524],
        [-0.2675, -0.1768],
        [-0.0156, -0.5623],
        [-0.3005, -0.0124],
        [-0.3550,  0.1603],
        [-0.6463,  0.6320]], device='cuda:0')
pred: [1 0 1 0 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 0 1 0 1 1 0 1 1 0 1 1 1]
real: 

  6%|▌         | 3/52 [00:10<02:48,  3.44s/it]

logits:
tensor([[-0.1116, -0.3828],
        [ 0.3767, -1.3826],
        [-0.2385, -0.2143],
        [-0.1955, -0.2761],
        [ 0.0274, -0.6146],
        [ 0.4599, -1.3853],
        [ 0.1097, -0.8759],
        [-0.2171, -0.2358],
        [-0.4392,  0.1941],
        [ 0.1540, -0.8228],
        [ 0.2157, -1.1430],
        [-0.5495,  0.4468],
        [-0.1342, -0.3244],
        [-0.3207,  0.0670],
        [ 0.3350, -1.3194],
        [-0.5654,  0.3639],
        [-0.1856, -0.3745],
        [-0.4040,  0.0953],
        [ 0.2156, -0.9259],
        [-0.3139, -0.0643],
        [ 0.0930, -0.9021],
        [ 0.1529, -0.7578],
        [-0.0636, -0.5371],
        [ 0.4564, -1.4580],
        [ 0.3734, -1.3819],
        [-0.2502, -0.1207],
        [ 0.1042, -0.8880],
        [ 0.2417, -1.1648],
        [-0.0677, -0.4107],
        [-0.5174,  0.4433],
        [-0.5630,  0.3085],
        [-0.4100,  0.0858]], device='cuda:0')
pred: [0 0 1 0 0 0 0 0 1 0 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 1 0 0 0 1 1 1]
real: 

  8%|▊         | 4/52 [00:13<02:45,  3.45s/it]

logits:
tensor([[-0.4594,  0.1942],
        [-0.2490, -0.1600],
        [ 0.5011, -1.6156],
        [-0.4343,  0.1899],
        [-0.4382,  0.1644],
        [ 0.5275, -1.5857],
        [-0.0979, -0.4153],
        [-0.5157,  0.4183],
        [-0.4206,  0.1617],
        [-0.0960, -0.5057],
        [ 0.2510, -1.1424],
        [-0.0025, -0.7083],
        [-0.1815, -0.3159],
        [ 0.2402, -1.2167],
        [-0.5195,  0.3242],
        [ 0.1753, -1.1434],
        [-0.4820,  0.1936],
        [-0.4538,  0.1566],
        [-0.2238, -0.1559],
        [-0.1502, -0.2996],
        [ 0.2534, -1.1844],
        [ 0.2351, -1.0741],
        [-0.4497,  0.3434],
        [ 0.1778, -0.9236],
        [-0.6639,  0.6898],
        [ 0.0900, -0.8275],
        [-0.3117, -0.0271],
        [-0.3297,  0.0058],
        [ 0.1923, -1.0422],
        [ 0.0666, -0.7627],
        [-0.0497, -0.5895],
        [-0.2848, -0.2319]], device='cuda:0')
pred: [1 1 0 1 1 0 0 1 1 0 0 0 0 0 1 0 1 1 1 0 0 0 1 0 1 0 1 1 0 0 0 1]
real: 

 10%|▉         | 5/52 [00:17<02:37,  3.36s/it]

logits:
tensor([[-0.6720,  0.6505],
        [ 0.2047, -1.0013],
        [-0.1893, -0.2360],
        [ 0.0273, -0.7019],
        [-0.3562,  0.0190],
        [-0.1003, -0.4529],
        [-0.4290,  0.1151],
        [-0.0065, -0.6842],
        [-0.0332, -0.6703],
        [-0.1150, -0.3863],
        [-0.0954, -0.4333],
        [ 0.4283, -1.3133],
        [-0.1670, -0.2901],
        [ 0.2878, -1.1780],
        [ 0.3416, -1.3246],
        [ 0.2675, -1.1147],
        [ 0.3625, -1.2420],
        [-0.4905,  0.3117],
        [-0.5759,  0.4742],
        [-0.6453,  0.5545],
        [ 0.2718, -1.2108],
        [-0.0838, -0.4905],
        [-0.3198, -0.0166],
        [-0.4724,  0.1789],
        [-0.3353, -0.0858],
        [-0.3803,  0.0586],
        [-0.0777, -0.4964],
        [ 0.1158, -0.9824],
        [ 0.2286, -1.1289],
        [-0.3365, -0.0180],
        [-0.0977, -0.3824],
        [ 0.4382, -1.4741]], device='cuda:0')
pred: [1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 1 0 0 0 1 0 0]
real: 

 12%|█▏        | 6/52 [00:20<02:32,  3.32s/it]

logits:
tensor([[ 0.1834, -1.0147],
        [-0.0835, -0.4816],
        [-0.1777, -0.4208],
        [-0.5407,  0.4513],
        [-0.5044,  0.3361],
        [-0.0415, -0.4398],
        [-0.3133,  0.0051],
        [-0.6252,  0.5832],
        [ 0.0525, -0.6324],
        [-0.0112, -0.5755],
        [ 0.5223, -1.6334],
        [ 0.0848, -0.7206],
        [ 0.1380, -0.9281],
        [-0.0853, -0.4622],
        [-0.2707, -0.0783],
        [ 0.2823, -1.2302],
        [ 0.3663, -1.3548],
        [-0.6603,  0.5931],
        [ 0.4773, -1.5471],
        [ 0.3296, -1.2708],
        [ 0.2341, -1.1840],
        [ 0.2121, -1.0775],
        [-0.3444, -0.0254],
        [-0.3665,  0.0844],
        [ 0.2599, -1.0970],
        [ 0.5880, -1.6800],
        [ 0.2761, -1.1254],
        [ 0.5038, -1.6013],
        [-0.6293,  0.5896],
        [-0.6349,  0.5718],
        [ 0.2471, -1.1566],
        [ 0.6250, -1.7276]], device='cuda:0')
pred: [0 0 0 1 1 0 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0]
real: 

 13%|█▎        | 7/52 [00:23<02:30,  3.35s/it]

logits:
tensor([[-0.1966, -0.2195],
        [-0.1853, -0.2990],
        [-0.1520, -0.2842],
        [-0.3545,  0.0870],
        [-0.5913,  0.5014],
        [-0.1420, -0.3591],
        [ 0.0109, -0.6698],
        [-0.4400,  0.2684],
        [ 0.1726, -1.0495],
        [-0.2359, -0.2133],
        [ 0.1952, -1.0084],
        [-0.2368, -0.1112],
        [-0.0875, -0.4490],
        [-0.4054,  0.0207],
        [-0.4224, -0.0515],
        [ 0.2668, -1.2532],
        [ 0.3356, -1.2644],
        [ 0.3274, -1.3945],
        [-0.0060, -0.7527],
        [-0.5990,  0.3930],
        [-0.1913, -0.3471],
        [-0.5718,  0.4382],
        [-0.3869,  0.1406],
        [ 0.2975, -1.1808],
        [-0.4053,  0.1091],
        [-0.4378,  0.2090],
        [-0.5452,  0.3886],
        [-0.2062, -0.2906],
        [ 0.3686, -1.2200],
        [-0.3245, -0.0121],
        [-0.6180,  0.5851],
        [ 0.3268, -1.2895]], device='cuda:0')
pred: [0 0 0 1 1 0 0 1 0 1 0 1 0 1 1 0 0 0 0 1 0 1 1 0 1 1 1 0 0 1 1 0]
real: 

 15%|█▌        | 8/52 [00:27<02:29,  3.39s/it]

logits:
tensor([[ 0.0602, -0.9039],
        [ 0.2546, -1.1284],
        [-0.6799,  0.7052],
        [-0.3826,  0.0700],
        [ 0.5633, -1.7248],
        [-0.1764, -0.3549],
        [-0.4135,  0.1094],
        [-0.0932, -0.5305],
        [-0.1923, -0.2363],
        [ 0.4654, -1.4063],
        [-0.5981,  0.5278],
        [-0.3993,  0.1306],
        [-0.5537,  0.4242],
        [-0.1552, -0.2145],
        [-0.0758, -0.5593],
        [ 0.2333, -0.9467],
        [ 0.3313, -1.4024],
        [-0.0369, -0.5486],
        [ 0.2922, -1.2865],
        [ 0.4574, -1.6318],
        [ 0.3766, -1.3508],
        [ 0.4324, -1.4316],
        [-0.4798,  0.3584],
        [-0.0318, -0.5185],
        [-0.5078,  0.2795],
        [ 0.1466, -0.8775],
        [-0.0504, -0.4867],
        [ 0.4996, -1.6026],
        [ 0.5407, -1.6151],
        [ 0.2035, -0.9228],
        [-0.4722,  0.2596],
        [ 0.0224, -0.6884]], device='cuda:0')
pred: [0 0 1 1 0 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0]
real: 

 17%|█▋        | 9/52 [00:30<02:29,  3.47s/it]

logits:
tensor([[-0.3442, -0.0772],
        [ 0.0028, -0.6673],
        [-0.5714,  0.4280],
        [ 0.3631, -1.2220],
        [-0.3288, -0.0550],
        [-0.1415, -0.3615],
        [-0.1929, -0.2903],
        [ 0.4962, -1.5432],
        [-0.0301, -0.5251],
        [ 0.6521, -1.7076],
        [-0.5139,  0.2119],
        [ 0.2070, -0.9985],
        [-0.2844, -0.1548],
        [-0.1140, -0.3881],
        [ 0.0525, -0.7235],
        [-0.5479,  0.3765],
        [-0.1864, -0.3257],
        [-0.4285,  0.2463],
        [-0.4008,  0.2017],
        [-0.2324, -0.2615],
        [ 0.3411, -1.4260],
        [ 0.4258, -1.5919],
        [-0.4214,  0.1200],
        [-0.2872, -0.1515],
        [-0.5644,  0.4680],
        [ 0.6699, -1.8717],
        [-0.1069, -0.5244],
        [ 0.1547, -0.9909],
        [-0.1514, -0.4272],
        [ 0.5585, -1.6909],
        [-0.4795,  0.3263],
        [ 0.3888, -1.3203]], device='cuda:0')
pred: [1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 1 0 1 1 0 0 0 1 1 1 0 0 0 0 0 1 0]
real: 

 19%|█▉        | 10/52 [00:34<02:26,  3.50s/it]

logits:
tensor([[-0.3008, -0.0976],
        [-0.3832,  0.0223],
        [-0.1526, -0.3288],
        [ 0.4278, -1.4088],
        [ 0.4047, -1.4441],
        [ 0.6152, -1.6811],
        [ 0.2700, -1.1221],
        [-0.1596, -0.2791],
        [-0.3342,  0.0592],
        [-0.1885, -0.2966],
        [-0.6365,  0.5433],
        [-0.6076,  0.5673],
        [-0.4149,  0.0966],
        [ 0.0569, -0.8356],
        [-0.0337, -0.5973],
        [-0.3853,  0.1045],
        [-0.5893,  0.5322],
        [-0.6114,  0.5846],
        [ 0.0339, -0.5955],
        [-0.4086,  0.1827],
        [ 0.3604, -1.3483],
        [ 0.3514, -1.3347],
        [-0.1123, -0.5016],
        [-0.2867, -0.0788],
        [-0.4347,  0.1104],
        [-0.0476, -0.5630],
        [ 0.4647, -1.4781],
        [-0.1043, -0.4372],
        [-0.2827, -0.0696],
        [-0.4795,  0.2790],
        [-0.3758,  0.0355],
        [-0.3144, -0.0265]], device='cuda:0')
pred: [1 1 0 0 0 0 0 0 1 0 1 1 1 0 0 1 1 1 0 1 0 0 0 1 1 0 0 0 1 1 1 1]
real: 

 21%|██        | 11/52 [00:37<02:23,  3.49s/it]

logits:
tensor([[-0.4109,  0.1273],
        [-0.4287,  0.0727],
        [ 0.5556, -1.7524],
        [ 0.0627, -0.7824],
        [-0.5594,  0.4679],
        [-0.4036,  0.0963],
        [ 0.1135, -0.8828],
        [ 0.1435, -0.7943],
        [-0.2699, -0.1294],
        [-0.2998,  0.0053],
        [-0.3972,  0.1278],
        [-0.1677, -0.4293],
        [ 0.3032, -1.2164],
        [-0.2386, -0.1292],
        [ 0.4805, -1.4750],
        [-0.5579,  0.4955],
        [-0.1804, -0.2130],
        [-0.0493, -0.5926],
        [-0.2712, -0.0965],
        [-0.2804, -0.0872],
        [-0.0733, -0.4765],
        [-0.3455,  0.0887],
        [ 0.3812, -1.4423],
        [-0.5797,  0.5145],
        [ 0.0362, -0.6148],
        [-0.5126,  0.3711],
        [ 0.5060, -1.5526],
        [-0.0258, -0.4342],
        [-0.0103, -0.7231],
        [-0.4185,  0.1140],
        [-0.2452, -0.2313],
        [-0.4009,  0.0913]], device='cuda:0')
pred: [1 1 0 0 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 1 0 1 0 1 0 1 0 0 0 1 1 1]
real: 

 23%|██▎       | 12/52 [00:41<02:19,  3.49s/it]

logits:
tensor([[ 0.1424, -0.8609],
        [ 0.4876, -1.4688],
        [-0.4834,  0.3432],
        [-0.2011, -0.2423],
        [ 0.0403, -0.8141],
        [ 0.2288, -1.0913],
        [-0.3876,  0.0758],
        [ 0.0777, -0.8246],
        [-0.1699, -0.3476],
        [ 0.4268, -1.3409],
        [-0.2872, -0.0431],
        [-0.3525,  0.0759],
        [-0.4938,  0.1816],
        [ 0.4541, -1.5874],
        [-0.4258,  0.1259],
        [-0.6048,  0.5883],
        [-0.3989,  0.0076],
        [ 0.1824, -1.0717],
        [-0.1172, -0.4365],
        [-0.2603, -0.1469],
        [-0.6177,  0.5378],
        [ 0.3254, -1.2519],
        [-0.4006,  0.1008],
        [-0.1108, -0.4685],
        [-0.1732, -0.2748],
        [ 0.6111, -1.7448],
        [-0.0049, -0.5750],
        [-0.6042,  0.6183],
        [ 0.0393, -0.7277],
        [-0.1038, -0.3498],
        [-0.4792,  0.3241],
        [-0.5914,  0.5113]], device='cuda:0')
pred: [0 0 1 0 0 0 1 0 0 0 1 1 1 0 1 1 1 0 0 1 1 0 1 0 0 0 0 1 0 0 1 1]
real: 

 25%|██▌       | 13/52 [00:44<02:15,  3.47s/it]

logits:
tensor([[-0.2463, -0.1736],
        [ 0.1775, -1.1099],
        [-0.5338,  0.3169],
        [-0.1196, -0.3740],
        [-0.2045, -0.3216],
        [ 0.5509, -1.6195],
        [ 0.5522, -1.4964],
        [-0.1873, -0.2879],
        [-0.0416, -0.5601],
        [-0.2990, -0.0365],
        [-0.0900, -0.4023],
        [-0.4432,  0.2838],
        [-0.0060, -0.5569],
        [-0.4847,  0.3182],
        [-0.3222, -0.0362],
        [-0.4420,  0.2498],
        [-0.1560, -0.3514],
        [ 0.3256, -1.2101],
        [-0.3421, -0.0682],
        [ 0.0659, -0.6607],
        [ 0.3841, -1.3591],
        [-0.4180,  0.1752],
        [-0.2808, -0.1080],
        [-0.5119,  0.3657],
        [-0.2895, -0.0566],
        [ 0.2577, -0.9568],
        [ 0.1621, -0.8751],
        [-0.3099, -0.0498],
        [-0.1110, -0.5571],
        [ 0.2417, -1.0467],
        [-0.0589, -0.5888],
        [-0.4905,  0.2142]], device='cuda:0')
pred: [1 0 1 0 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 1]
real: 

 27%|██▋       | 14/52 [00:48<02:11,  3.46s/it]

logits:
tensor([[ 0.0302, -0.6813],
        [ 0.5456, -1.7721],
        [-0.1636, -0.3604],
        [-0.1803, -0.2566],
        [-0.1287, -0.3228],
        [ 0.4311, -1.4788],
        [-0.2917, -0.1189],
        [-0.4378,  0.1240],
        [ 0.4368, -1.5391],
        [ 0.1363, -0.9267],
        [-0.3863,  0.1324],
        [ 0.0145, -0.8082],
        [ 0.1471, -1.0225],
        [-0.0885, -0.3569],
        [-0.3805,  0.0924],
        [-0.1204, -0.5670],
        [ 0.5806, -1.8301],
        [ 0.3878, -1.2433],
        [-0.5994,  0.5667],
        [ 0.5074, -1.5428],
        [-0.2934, -0.0624],
        [ 0.0069, -0.5997],
        [-0.1975, -0.1717],
        [ 0.0456, -0.7312],
        [ 0.4178, -1.3410],
        [ 0.6189, -1.6894],
        [ 0.7316, -1.9236],
        [-0.3838,  0.0879],
        [ 0.3011, -1.3985],
        [-0.4036,  0.0050],
        [ 0.0236, -0.7287],
        [-0.5104,  0.3361]], device='cuda:0')
pred: [0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 1 0 1 0 1]
real: 

 29%|██▉       | 15/52 [00:51<02:08,  3.47s/it]

logits:
tensor([[-0.6492,  0.5420],
        [ 0.4462, -1.4504],
        [-0.6100,  0.5161],
        [-0.4462,  0.2892],
        [-0.6301,  0.5453],
        [ 0.0726, -0.8458],
        [-0.5162,  0.2954],
        [-0.3273, -0.1151],
        [ 0.2343, -1.1196],
        [-0.5945,  0.5530],
        [ 0.0718, -0.8936],
        [ 0.5550, -1.6131],
        [-0.3442, -0.0772],
        [-0.4686,  0.1963],
        [ 0.1756, -0.8779],
        [ 0.2934, -1.0726],
        [-0.2840, -0.0531],
        [ 0.4369, -1.4461],
        [ 0.0142, -0.7759],
        [ 0.1664, -0.8804],
        [-0.5981,  0.4160],
        [-0.1641, -0.4284],
        [ 0.3767, -1.4401],
        [-0.5480,  0.3929],
        [-0.5561,  0.4474],
        [-0.2691, -0.0689],
        [ 0.0301, -0.6869],
        [-0.1549, -0.4008],
        [-0.0569, -0.5441],
        [-0.1764, -0.3947],
        [ 0.4521, -1.4523],
        [-0.1086, -0.4151]], device='cuda:0')
pred: [1 0 1 1 1 0 1 1 0 1 0 0 1 1 0 0 1 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0]
real: 

 31%|███       | 16/52 [00:55<02:04,  3.45s/it]

logits:
tensor([[-9.2806e-02, -3.9159e-01],
        [ 1.7190e-01, -1.0693e+00],
        [-5.2416e-01,  3.7492e-01],
        [-3.5322e-01, -3.7233e-02],
        [-4.8313e-01,  3.2210e-01],
        [-5.1810e-02, -5.4365e-01],
        [ 2.8528e-01, -1.1769e+00],
        [ 5.2751e-01, -1.5834e+00],
        [ 3.8829e-01, -1.3888e+00],
        [ 1.1173e-01, -9.4957e-01],
        [-3.6121e-01,  1.5324e-01],
        [-5.9604e-01,  5.2882e-01],
        [-5.9648e-01,  5.4543e-01],
        [-7.5362e-04, -6.8218e-01],
        [ 7.3780e-02, -7.3588e-01],
        [ 4.5029e-01, -1.4652e+00],
        [ 2.8436e-01, -1.2994e+00],
        [-8.7638e-04, -6.4719e-01],
        [-3.6396e-01,  7.4841e-02],
        [ 1.2846e-01, -9.7172e-01],
        [-7.5248e-02, -6.1831e-01],
        [ 1.0492e-01, -8.4074e-01],
        [-5.1161e-01,  2.6393e-01],
        [ 3.9638e-01, -1.4702e+00],
        [ 1.1765e-01, -8.5788e-01],
        [ 9.6080e-02, -9.1273e-01],
        [-5.1937e-01,  3.5515e-01],
        [-4.4359e-01

 33%|███▎      | 17/52 [00:58<02:01,  3.49s/it]

logits:
tensor([[-0.5881,  0.5250],
        [ 0.1774, -1.0204],
        [-0.4531,  0.2392],
        [-0.3260,  0.0027],
        [-0.0041, -0.6870],
        [ 0.1992, -0.8146],
        [-0.3789,  0.0978],
        [ 0.1505, -0.9548],
        [-0.3122,  0.0138],
        [ 0.4276, -1.4459],
        [ 0.1211, -0.9053],
        [ 0.2084, -1.0641],
        [ 0.2794, -1.1546],
        [-0.3385, -0.0605],
        [ 0.5477, -1.6724],
        [-0.4862,  0.3777],
        [-0.4548,  0.2455],
        [-0.1766, -0.2471],
        [-0.1432, -0.3434],
        [-0.0536, -0.4408],
        [-0.1784, -0.2741],
        [ 0.1946, -1.0198],
        [-0.6553,  0.7015],
        [-0.0241, -0.6215],
        [-0.3321, -0.0862],
        [ 0.4009, -1.2800],
        [ 0.3108, -1.1994],
        [-0.0852, -0.5121],
        [-0.3285,  0.0473],
        [-0.0293, -0.6249],
        [-0.6415,  0.6453],
        [-0.1718, -0.3071]], device='cuda:0')
pred: [1 0 1 1 0 0 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 0 1 0 1 0]
real: 

 35%|███▍      | 18/52 [01:01<01:55,  3.39s/it]

logits:
tensor([[-0.6268,  0.5487],
        [ 0.7143, -1.8632],
        [-0.4141,  0.0583],
        [-0.3714,  0.0950],
        [ 0.5258, -1.5893],
        [ 0.5993, -1.6438],
        [ 0.5373, -1.6610],
        [-0.5334,  0.4192],
        [-0.5089,  0.3942],
        [-0.6004,  0.4671],
        [-0.4326,  0.2489],
        [-0.5356,  0.3906],
        [ 0.4400, -1.5567],
        [-0.0899, -0.4527],
        [ 0.1952, -0.9469],
        [-0.6419,  0.5942],
        [-0.3854,  0.1148],
        [ 0.1083, -0.7499],
        [ 0.2284, -1.0097],
        [-0.3368,  0.0279],
        [ 0.5149, -1.5549],
        [ 0.2968, -1.1460],
        [ 0.1586, -0.9750],
        [-0.2198, -0.1876],
        [-0.4548,  0.2531],
        [ 0.4551, -1.4552],
        [-0.3946,  0.1481],
        [ 0.1884, -1.0790],
        [ 0.2113, -1.0150],
        [-0.1509, -0.2824],
        [-0.0498, -0.5587],
        [-0.0900, -0.4744]], device='cuda:0')
pred: [1 0 1 1 0 0 0 1 1 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 0 0 0 0 0]
real: 

 37%|███▋      | 19/52 [01:04<01:49,  3.33s/it]

logits:
tensor([[-0.3127,  0.0168],
        [ 0.0312, -0.7505],
        [-0.2366, -0.2937],
        [-0.1463, -0.4772],
        [-0.5359,  0.3970],
        [-0.2138, -0.2391],
        [ 0.2682, -1.0499],
        [-0.4456,  0.2223],
        [-0.4150,  0.1536],
        [-0.2802, -0.0938],
        [-0.0445, -0.5515],
        [ 0.8101, -1.9369],
        [-0.3757,  0.0753],
        [-0.0993, -0.5305],
        [ 0.1852, -1.0596],
        [-0.4413,  0.1173],
        [-0.0950, -0.4606],
        [ 0.5940, -1.7329],
        [ 0.3872, -1.3063],
        [-0.6512,  0.6594],
        [-0.2422, -0.1711],
        [ 0.3156, -1.2668],
        [ 0.4806, -1.5636],
        [ 0.6479, -1.8844],
        [-0.0733, -0.4777],
        [-0.1427, -0.4694],
        [ 0.4345, -1.4961],
        [-0.1666, -0.3118],
        [-0.1302, -0.4104],
        [-0.2019, -0.2084],
        [-0.0949, -0.5032],
        [-0.3841,  0.1552]], device='cuda:0')
pred: [1 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1]
real: 

 38%|███▊      | 20/52 [01:08<01:45,  3.30s/it]

logits:
tensor([[-0.1473, -0.4098],
        [ 0.0667, -0.7921],
        [-0.3053, -0.0607],
        [ 0.5281, -1.6488],
        [-0.4746,  0.2601],
        [ 0.3201, -1.2758],
        [ 0.0404, -0.8185],
        [-0.0767, -0.4325],
        [-0.0514, -0.4138],
        [-0.3043, -0.1238],
        [-0.1147, -0.4637],
        [-0.4624,  0.2657],
        [ 0.0709, -0.7056],
        [ 0.4738, -1.5149],
        [-0.5204,  0.3382],
        [-0.4852,  0.2778],
        [-0.1445, -0.2634],
        [ 0.0608, -0.8057],
        [ 0.4634, -1.4141],
        [ 0.7042, -2.0217],
        [ 0.0945, -0.7215],
        [-0.3669,  0.1306],
        [ 0.1814, -0.9309],
        [-0.4786,  0.2051],
        [ 0.7060, -1.8534],
        [-0.0852, -0.5348],
        [-0.5517,  0.3978],
        [-0.1178, -0.5530],
        [-0.5991,  0.5592],
        [-0.2584, -0.0795],
        [ 0.4231, -1.4386],
        [-0.2880, -0.0769]], device='cuda:0')
pred: [0 0 1 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 1 1 0 1]
real: 

 40%|████      | 21/52 [01:11<01:41,  3.28s/it]

logits:
tensor([[ 0.3580, -1.2743],
        [ 0.1641, -0.9063],
        [ 0.4700, -1.5731],
        [-0.5632,  0.4183],
        [ 0.1128, -0.7589],
        [ 0.5406, -1.5226],
        [-0.3554, -0.0174],
        [-0.5477,  0.3927],
        [-0.1416, -0.4001],
        [ 0.6084, -1.7044],
        [-0.4916,  0.3113],
        [ 0.3344, -1.3257],
        [ 0.3172, -1.3842],
        [ 0.7047, -1.8720],
        [ 0.4185, -1.4877],
        [-0.5518,  0.4284],
        [-0.3934,  0.1226],
        [-0.2958, -0.0543],
        [-0.0347, -0.5893],
        [ 0.0807, -0.7579],
        [ 0.0723, -0.8096],
        [ 0.5486, -1.6598],
        [ 0.0654, -0.8295],
        [-0.5488,  0.4929],
        [ 0.3097, -1.2985],
        [-0.5455,  0.4685],
        [ 0.5763, -1.6635],
        [-0.0381, -0.6538],
        [-0.3878,  0.1588],
        [ 0.1444, -0.9806],
        [-0.1629, -0.2941],
        [-0.1566, -0.3505]], device='cuda:0')
pred: [0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0]
real: 

 42%|████▏     | 22/52 [01:14<01:38,  3.28s/it]

logits:
tensor([[ 0.1357, -0.9869],
        [ 0.1097, -0.8440],
        [-0.0097, -0.7104],
        [ 0.3936, -1.2622],
        [-0.3708,  0.0054],
        [ 0.1758, -0.8997],
        [-0.4431,  0.2191],
        [-0.5980,  0.5532],
        [ 0.3855, -1.3696],
        [ 0.0230, -0.6804],
        [-0.2665, -0.1442],
        [-0.0452, -0.6592],
        [ 0.4405, -1.3849],
        [-0.1079, -0.4136],
        [ 0.0253, -0.6987],
        [ 0.0386, -0.7362],
        [-0.4193,  0.1580],
        [-0.3221, -0.0052],
        [ 0.0100, -0.6758],
        [ 0.0748, -0.8427],
        [-0.5574,  0.3943],
        [ 0.4512, -1.4844],
        [-0.3623,  0.0962],
        [-0.1586, -0.4467],
        [ 0.0780, -0.7347],
        [ 0.6520, -1.8064],
        [-0.5941,  0.5438],
        [ 0.4883, -1.4485],
        [-0.2253, -0.1061],
        [ 0.0151, -0.6058],
        [ 0.6299, -1.7752],
        [ 0.2107, -1.0156]], device='cuda:0')
pred: [0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 1 0 0 0]
real: 

 44%|████▍     | 23/52 [01:17<01:34,  3.27s/it]

logits:
tensor([[-0.0191, -0.6011],
        [-0.2261, -0.2770],
        [ 0.2834, -1.1906],
        [ 0.3933, -1.3473],
        [-0.3391,  0.0240],
        [-0.6337,  0.5573],
        [ 0.0451, -0.7446],
        [ 0.1964, -0.9386],
        [-0.5006,  0.3471],
        [-0.2904,  0.0400],
        [ 0.2457, -1.0130],
        [-0.4353,  0.1942],
        [ 0.5311, -1.6441],
        [-0.1614, -0.2424],
        [-0.4807,  0.4092],
        [ 0.1033, -0.8624],
        [-0.4687,  0.3026],
        [ 0.3834, -1.2017],
        [ 0.4393, -1.5022],
        [ 0.4438, -1.4415],
        [-0.4589,  0.3082],
        [-0.3729,  0.0865],
        [-0.3326,  0.0906],
        [-0.2873, -0.1661],
        [-0.4388,  0.2608],
        [-0.1709, -0.3229],
        [ 0.6142, -1.7529],
        [-0.0811, -0.4386],
        [ 0.5104, -1.5499],
        [-0.0110, -0.7025],
        [-0.1744, -0.3721],
        [ 0.0882, -0.7792]], device='cuda:0')
pred: [0 0 0 0 1 1 0 0 1 1 0 1 0 0 1 0 1 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0]
real: 

 46%|████▌     | 24/52 [01:21<01:32,  3.29s/it]

logits:
tensor([[ 0.2918, -1.2263],
        [-0.6259,  0.5529],
        [-0.5374,  0.3596],
        [-0.1182, -0.3595],
        [-0.1882, -0.2217],
        [ 0.2762, -1.3464],
        [-0.5081,  0.2975],
        [-0.0731, -0.5460],
        [ 0.2805, -1.1480],
        [-0.2970, -0.0573],
        [ 0.5194, -1.4396],
        [-0.2139, -0.1635],
        [ 0.2064, -1.1803],
        [ 0.1550, -0.9174],
        [-0.1118, -0.3650],
        [-0.4763,  0.3078],
        [-0.0968, -0.5486],
        [-0.0253, -0.6725],
        [ 0.2236, -1.1039],
        [-0.1023, -0.4155],
        [-0.5729,  0.4174],
        [-0.4622,  0.1887],
        [-0.0480, -0.5786],
        [-0.5330,  0.3736],
        [-0.3022, -0.0633],
        [ 0.3912, -1.3464],
        [ 0.4835, -1.6624],
        [-0.6113,  0.5492],
        [ 0.1750, -1.0729],
        [-0.4890,  0.2581],
        [ 0.0523, -0.6067],
        [-0.5479,  0.3542]], device='cuda:0')
pred: [0 1 1 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 1 1 0 1 1 0 0 1 0 1 0 1]
real: 

 48%|████▊     | 25/52 [01:24<01:28,  3.27s/it]

logits:
tensor([[-0.3442,  0.0232],
        [ 0.3383, -1.2728],
        [-0.3614,  0.0813],
        [ 0.6575, -1.7842],
        [ 0.0465, -0.6076],
        [ 0.1655, -0.9463],
        [ 0.0370, -0.6159],
        [-0.4923,  0.2922],
        [-0.2884, -0.1138],
        [ 0.5106, -1.5703],
        [ 0.6125, -1.8211],
        [ 0.4238, -1.4930],
        [-0.0592, -0.6070],
        [-0.5058,  0.2562],
        [ 0.2618, -1.2322],
        [-0.2388, -0.2248],
        [ 0.1089, -0.9299],
        [-0.5899,  0.5376],
        [-0.2909, -0.1714],
        [-0.0949, -0.4037],
        [-0.5927,  0.4157],
        [-0.5884,  0.5282],
        [-0.1785, -0.3345],
        [-0.3581,  0.1392],
        [ 0.0913, -0.8591],
        [-0.5960,  0.4822],
        [-0.4543,  0.1997],
        [-0.3565,  0.1376],
        [-0.1737, -0.5024],
        [-0.6500,  0.6131],
        [-0.1560, -0.1899],
        [-0.0843, -0.5056]], device='cuda:0')
pred: [1 0 1 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 1 0 1 1 0 1 0 1 1 1 0 1 0 0]
real: 

 50%|█████     | 26/52 [01:27<01:24,  3.26s/it]

logits:
tensor([[ 0.2332, -1.0158],
        [ 0.0708, -0.6080],
        [ 0.6041, -1.7322],
        [-0.6301,  0.5876],
        [ 0.2292, -1.1218],
        [ 0.0820, -0.7394],
        [ 0.0957, -0.7697],
        [-0.0421, -0.5231],
        [ 0.1120, -0.8116],
        [ 0.1129, -0.8851],
        [-0.4423,  0.1857],
        [-0.6182,  0.6079],
        [-0.3101, -0.1689],
        [-0.3468, -0.0211],
        [ 0.0822, -0.7680],
        [-0.5956,  0.4815],
        [-0.2495, -0.1830],
        [-0.6491,  0.5345],
        [ 0.4202, -1.5238],
        [-0.4405,  0.2798],
        [-0.5842,  0.5135],
        [ 0.1894, -1.0178],
        [-0.5797,  0.4775],
        [-0.1935, -0.2810],
        [-0.6707,  0.5726],
        [ 0.2285, -1.0291],
        [-0.4696,  0.3120],
        [ 0.2413, -1.1007],
        [-0.3126, -0.0085],
        [ 0.0356, -0.6768],
        [ 0.2232, -1.0916],
        [ 0.0889, -0.9856]], device='cuda:0')
pred: [0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 0]
real: 

 52%|█████▏    | 27/52 [01:30<01:21,  3.24s/it]

logits:
tensor([[ 0.4310, -1.5609],
        [-0.4672,  0.2186],
        [ 0.0025, -0.6209],
        [-0.1434, -0.3302],
        [-0.2366, -0.0834],
        [ 0.0497, -0.8664],
        [-0.1615, -0.2847],
        [ 0.5201, -1.5649],
        [ 0.5719, -1.6704],
        [-0.5279,  0.3465],
        [ 0.1885, -0.9289],
        [-0.1648, -0.3281],
        [-0.3709,  0.0215],
        [ 0.2226, -1.0025],
        [ 0.0848, -0.7788],
        [-0.1007, -0.4331],
        [ 0.4957, -1.5486],
        [-0.2439, -0.1432],
        [-0.3883,  0.0219],
        [ 0.0023, -0.6225],
        [ 0.3334, -1.1750],
        [-0.2948, -0.1001],
        [-0.5419,  0.3529],
        [-0.0422, -0.6218],
        [ 0.4414, -1.4749],
        [-0.3950,  0.0623],
        [ 0.5207, -1.6946],
        [-0.2109, -0.3264],
        [ 0.2253, -1.0554],
        [-0.5351,  0.3358],
        [-0.0023, -0.6856],
        [-0.2564, -0.2007]], device='cuda:0')
pred: [0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 1 0 0 1 0 0 0 1 0 1]
real: 

 54%|█████▍    | 28/52 [01:34<01:18,  3.27s/it]

logits:
tensor([[ 0.2501, -1.1201],
        [-0.5272,  0.2936],
        [ 0.5711, -1.6000],
        [-0.0340, -0.5595],
        [-0.5491,  0.4528],
        [ 0.3989, -1.3632],
        [-0.4203,  0.1834],
        [-0.0829, -0.4119],
        [ 0.0680, -0.7406],
        [ 0.6091, -1.7853],
        [ 0.3690, -1.3181],
        [ 0.2257, -1.0710],
        [-0.0911, -0.4687],
        [-0.0465, -0.5870],
        [-0.2593, -0.1155],
        [-0.1537, -0.4471],
        [-0.1268, -0.3768],
        [-0.3247, -0.0753],
        [ 0.6941, -1.7268],
        [-0.1947, -0.2332],
        [-0.4973,  0.2592],
        [ 0.0821, -0.8418],
        [ 0.5365, -1.6448],
        [-0.6244,  0.5815],
        [-0.3703,  0.0073],
        [ 0.4306, -1.3893],
        [-0.5344,  0.3871],
        [-0.3538,  0.1517],
        [-0.1711, -0.3846],
        [ 0.2828, -1.1633],
        [ 0.2051, -1.1656],
        [ 0.2846, -1.0742]], device='cuda:0')
pred: [0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 1 0 1 1 0 0 0 0]
real: 

 56%|█████▌    | 29/52 [01:37<01:15,  3.29s/it]

logits:
tensor([[-0.1298, -0.4713],
        [ 0.0817, -0.7692],
        [-0.3913,  0.1335],
        [-0.0698, -0.5950],
        [-0.2529, -0.1403],
        [ 0.4182, -1.4539],
        [-0.2324, -0.2845],
        [ 0.1933, -1.0296],
        [ 0.4583, -1.6231],
        [ 0.1285, -0.8472],
        [ 0.0435, -0.8172],
        [ 0.0070, -0.7214],
        [ 0.1481, -0.8606],
        [-0.5454,  0.3772],
        [ 0.3670, -1.2784],
        [ 0.2711, -1.2015],
        [-0.6349,  0.5880],
        [-0.3738,  0.0550],
        [ 0.4246, -1.3438],
        [ 0.1818, -1.0145],
        [ 0.5578, -1.7555],
        [-0.1620, -0.3235],
        [ 0.1349, -0.8105],
        [ 0.2982, -1.1065],
        [ 0.0111, -0.7244],
        [ 0.5887, -1.6274],
        [ 0.4217, -1.3346],
        [-0.0032, -0.5241],
        [ 0.4995, -1.5485],
        [ 0.0575, -0.8993],
        [ 0.5791, -1.6435],
        [-0.3782,  0.0157]], device='cuda:0')
pred: [0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
real: 

 58%|█████▊    | 30/52 [01:40<01:11,  3.25s/it]

logits:
tensor([[ 0.7516, -2.0953],
        [-0.4522,  0.2236],
        [ 0.0380, -0.6509],
        [-0.3128, -0.0336],
        [ 0.4599, -1.5175],
        [-0.2898, -0.0972],
        [-0.0851, -0.4344],
        [ 0.3738, -1.3751],
        [-0.0888, -0.3978],
        [-0.4685,  0.2618],
        [ 0.5230, -1.6488],
        [ 0.4918, -1.5631],
        [ 0.1469, -0.9257],
        [-0.4407,  0.1425],
        [-0.4159,  0.0815],
        [-0.0440, -0.6546],
        [-0.1121, -0.3888],
        [-0.0906, -0.5331],
        [-0.3775,  0.1223],
        [-0.4792,  0.2402],
        [ 0.4373, -1.4649],
        [-0.4737,  0.2884],
        [-0.0681, -0.3347],
        [ 0.7158, -2.0767],
        [ 0.6043, -1.8072],
        [-0.0213, -0.5201],
        [-0.1047, -0.4966],
        [-0.5864,  0.5662],
        [-0.3230, -0.0279],
        [-0.1989, -0.2001],
        [ 0.0844, -0.8347],
        [ 0.3324, -1.3161]], device='cuda:0')
pred: [0 1 0 1 0 1 0 0 0 1 0 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 0]
real: 

 60%|█████▉    | 31/52 [01:44<01:09,  3.29s/it]

logits:
tensor([[-0.5680,  0.4057],
        [-0.4601,  0.2552],
        [ 0.4592, -1.5309],
        [ 0.4380, -1.4066],
        [-0.1749, -0.3420],
        [-0.5694,  0.4604],
        [ 0.2549, -1.0990],
        [ 0.3279, -1.2520],
        [ 0.3438, -1.1539],
        [-0.5456,  0.4000],
        [ 0.3197, -1.3004],
        [-0.3255, -0.0420],
        [ 0.0469, -0.8372],
        [-0.3747,  0.0235],
        [ 0.6746, -2.0347],
        [-0.5240,  0.3297],
        [-0.1708, -0.3660],
        [-0.0901, -0.3894],
        [-0.4339,  0.2082],
        [-0.4204,  0.1711],
        [ 0.3099, -1.2694],
        [-0.1695, -0.3913],
        [-0.6059,  0.5196],
        [-0.5061,  0.3721],
        [ 0.0049, -0.5587],
        [ 0.5772, -1.7196],
        [-0.1160, -0.4711],
        [ 0.5416, -1.7984],
        [-0.3308, -0.0978],
        [-0.4546,  0.2477],
        [-0.0406, -0.5389],
        [ 0.1235, -1.0055]], device='cuda:0')
pred: [1 1 0 0 0 1 0 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 1 0 0]
real: 

 62%|██████▏   | 32/52 [01:47<01:06,  3.32s/it]

logits:
tensor([[ 0.7436, -1.8901],
        [ 0.0800, -0.6678],
        [ 0.2304, -1.0445],
        [ 0.5340, -1.6327],
        [-0.3381, -0.0528],
        [ 0.2108, -1.0726],
        [-0.3231,  0.0081],
        [-0.4393,  0.1627],
        [-0.5767,  0.4589],
        [-0.0281, -0.6823],
        [ 0.0338, -0.6565],
        [-0.5110,  0.3931],
        [ 0.4133, -1.4673],
        [-0.5048,  0.3963],
        [ 0.5742, -1.7828],
        [ 0.0434, -0.8708],
        [-0.2239, -0.1689],
        [-0.3267, -0.0731],
        [-0.5229,  0.4191],
        [-0.3931,  0.0669],
        [-0.3594,  0.0263],
        [-0.6312,  0.5716],
        [-0.4726,  0.2973],
        [ 0.2973, -1.2044],
        [ 0.3581, -1.3671],
        [-0.4454,  0.2357],
        [-0.0485, -0.4712],
        [-0.0752, -0.4728],
        [ 0.4572, -1.4264],
        [-0.5051,  0.2594],
        [ 0.4157, -1.2992],
        [-0.5018,  0.2239]], device='cuda:0')
pred: [0 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 1 1 1 1 1 1 0 0 1 0 0 0 1 0 1]
real: 

 63%|██████▎   | 33/52 [01:50<01:02,  3.31s/it]

logits:
tensor([[ 4.2570e-01, -1.4020e+00],
        [ 1.9031e-03, -6.3299e-01],
        [-5.0383e-01,  2.1521e-01],
        [-5.5906e-01,  5.4411e-01],
        [ 5.1596e-01, -1.5964e+00],
        [-4.0506e-01,  7.4745e-02],
        [-6.2322e-01,  5.4662e-01],
        [ 4.0283e-01, -1.4082e+00],
        [ 6.0249e-01, -1.6839e+00],
        [ 5.4771e-01, -1.5510e+00],
        [ 3.5829e-01, -1.2832e+00],
        [-4.1779e-01,  1.3242e-01],
        [-4.8847e-01,  2.0345e-01],
        [ 2.1171e-01, -1.0959e+00],
        [-6.3067e-01,  6.2119e-01],
        [-5.5279e-01,  4.7077e-01],
        [-6.4503e-01,  5.8985e-01],
        [-1.6544e-01, -3.6324e-01],
        [ 2.5212e-01, -1.1848e+00],
        [-5.7999e-01,  4.6490e-01],
        [ 6.2219e-01, -1.7558e+00],
        [ 1.3412e-01, -8.7805e-01],
        [ 4.3938e-01, -1.5315e+00],
        [ 7.1836e-01, -1.9204e+00],
        [ 4.4200e-02, -7.9302e-01],
        [ 5.3650e-01, -1.6225e+00],
        [ 1.3112e-01, -9.1110e-01],
        [-4.5333e-01

 65%|██████▌   | 34/52 [01:54<00:59,  3.29s/it]

logits:
tensor([[ 0.3157, -1.2656],
        [-0.4861,  0.3096],
        [-0.1470, -0.3088],
        [ 0.2707, -1.2144],
        [-0.3259, -0.0497],
        [-0.2219, -0.2601],
        [ 0.0441, -0.7101],
        [ 0.0397, -0.7164],
        [ 0.0855, -0.8385],
        [ 0.3065, -1.1228],
        [ 0.4349, -1.6257],
        [-0.2026, -0.1535],
        [-0.5215,  0.3478],
        [-0.5138,  0.3550],
        [ 0.2030, -1.0712],
        [-0.6385,  0.5549],
        [ 0.6803, -1.7193],
        [-0.6199,  0.5695],
        [ 0.0769, -0.7855],
        [-0.2342, -0.3059],
        [-0.2013, -0.2669],
        [ 0.6645, -1.8137],
        [-0.4739,  0.2265],
        [-0.1455, -0.3431],
        [-0.2041, -0.2104],
        [ 0.3909, -1.3675],
        [-0.5953,  0.5142],
        [ 0.4006, -1.3538],
        [-0.3729, -0.0095],
        [-0.6071,  0.4280],
        [-0.4578,  0.1614],
        [-0.2139, -0.2138]], device='cuda:0')
pred: [0 1 0 0 1 0 0 0 0 0 0 1 1 1 0 1 0 1 0 0 0 0 1 0 0 0 1 0 1 1 1 1]
real: 

 67%|██████▋   | 35/52 [01:57<00:55,  3.26s/it]

logits:
tensor([[ 0.3842, -1.3297],
        [-0.0443, -0.5568],
        [-0.0524, -0.5579],
        [ 0.5641, -1.8186],
        [-0.0546, -0.5561],
        [ 0.3005, -1.3624],
        [-0.1744, -0.2243],
        [ 0.3595, -1.2067],
        [-0.3599,  0.1232],
        [-0.6093,  0.5167],
        [-0.1892, -0.2554],
        [-0.4724,  0.2970],
        [-0.1409, -0.3756],
        [-0.4844,  0.3675],
        [ 0.4799, -1.6445],
        [-0.5976,  0.4767],
        [-0.2380, -0.1062],
        [ 0.2442, -1.1556],
        [-0.4624,  0.2595],
        [ 0.3105, -1.1461],
        [-0.4876,  0.4150],
        [ 0.0646, -0.6774],
        [ 0.3578, -1.2613],
        [-0.0278, -0.6738],
        [-0.1859, -0.2107],
        [-0.5464,  0.3854],
        [ 0.0019, -0.4974],
        [-0.4995,  0.2854],
        [-0.5535,  0.4819],
        [-0.3505,  0.0650],
        [-0.5184,  0.4592],
        [ 0.4991, -1.5925]], device='cuda:0')
pred: [0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 1 0 1 0 1 0 0 0 0 1 0 1 1 1 1 0]
real: 

 69%|██████▉   | 36/52 [02:00<00:52,  3.28s/it]

logits:
tensor([[ 0.0417, -0.6359],
        [ 0.2382, -1.0527],
        [ 0.0793, -0.8188],
        [ 0.2752, -1.1411],
        [-0.2861, -0.0282],
        [ 0.6416, -1.8899],
        [ 0.2469, -1.1730],
        [-0.5767,  0.4483],
        [ 0.2018, -1.0143],
        [ 0.6429, -1.8203],
        [-0.5738,  0.3923],
        [ 0.2951, -1.3575],
        [-0.1222, -0.4142],
        [-0.6279,  0.6601],
        [-0.5040,  0.3724],
        [-0.3168, -0.0391],
        [ 0.0349, -0.7161],
        [ 0.5406, -1.6548],
        [-0.0194, -0.4801],
        [-0.3177,  0.0398],
        [-0.4980,  0.3476],
        [ 0.1308, -0.8920],
        [ 0.3340, -1.3942],
        [-0.1248, -0.4021],
        [-0.4043,  0.0574],
        [-0.2262, -0.1678],
        [-0.4650,  0.2556],
        [ 0.1739, -1.0605],
        [-0.2318, -0.1710],
        [ 0.7241, -1.9256],
        [-0.0557, -0.5427],
        [ 0.2005, -1.1105]], device='cuda:0')
pred: [0 0 0 0 1 0 0 1 0 0 1 0 0 1 1 1 0 0 0 1 1 0 0 0 1 1 1 0 1 0 0 0]
real: 

 71%|███████   | 37/52 [02:03<00:49,  3.27s/it]

logits:
tensor([[ 0.3494, -1.0618],
        [ 0.4209, -1.4748],
        [-0.4233,  0.0390],
        [ 0.0755, -0.7987],
        [ 0.1373, -0.9943],
        [ 0.0592, -0.7193],
        [-0.0172, -0.6459],
        [ 0.1705, -0.9974],
        [ 0.1758, -1.0114],
        [ 0.0727, -0.6458],
        [ 0.0292, -0.8030],
        [-0.4892,  0.2590],
        [-0.3769,  0.0710],
        [ 0.5209, -1.4550],
        [-0.3086,  0.0403],
        [ 0.3628, -1.1870],
        [-0.1793, -0.3604],
        [ 0.6637, -2.0150],
        [-0.0240, -0.6051],
        [-0.0372, -0.5565],
        [ 0.3390, -1.3140],
        [-0.1489, -0.3770],
        [-0.4561,  0.2888],
        [ 0.0387, -0.8237],
        [-0.3312,  0.0401],
        [-0.4000,  0.1231],
        [ 0.4577, -1.3339],
        [-0.4804,  0.2265],
        [ 0.0490, -0.8626],
        [-0.4599,  0.1242],
        [ 0.4971, -1.4948],
        [-0.3463,  0.0377]], device='cuda:0')
pred: [0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 1 0 1 1 0 1 0 1 0 1]
real: 

 73%|███████▎  | 38/52 [02:07<00:46,  3.29s/it]

logits:
tensor([[-0.0765, -0.4531],
        [ 0.2865, -1.0930],
        [-0.5222,  0.3344],
        [ 0.3865, -1.3681],
        [-0.1070, -0.4579],
        [ 0.2367, -1.1655],
        [-0.2169, -0.1688],
        [-0.1444, -0.4576],
        [ 0.3502, -1.3607],
        [-0.3845,  0.0707],
        [ 0.1067, -0.8057],
        [-0.2305, -0.1559],
        [-0.4328,  0.1382],
        [-0.4626,  0.2710],
        [ 0.0759, -0.7131],
        [-0.2555, -0.1905],
        [ 0.2338, -1.0795],
        [ 0.3918, -1.3887],
        [-0.0665, -0.5613],
        [ 0.2002, -0.9856],
        [ 0.2775, -1.2319],
        [-0.3163,  0.0393],
        [-0.2569, -0.1552],
        [-0.0724, -0.4659],
        [-0.5942,  0.5366],
        [-0.3237,  0.0240],
        [ 0.1807, -1.0227],
        [-0.5660,  0.5033],
        [-0.6114,  0.5530],
        [-0.1491, -0.3722],
        [ 0.5267, -1.5855],
        [ 0.0788, -0.7447]], device='cuda:0')
pred: [0 0 1 0 0 0 1 0 0 1 0 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 1 0 0 0]
real: 

 75%|███████▌  | 39/52 [02:10<00:42,  3.29s/it]

logits:
tensor([[ 0.1956, -1.0182],
        [ 0.0361, -0.6790],
        [-0.5472,  0.3616],
        [-0.3004, -0.1355],
        [ 0.2897, -1.1443],
        [ 0.0317, -0.7407],
        [-0.2610, -0.1113],
        [ 0.6375, -1.8900],
        [-0.4396,  0.2388],
        [ 0.0997, -0.7900],
        [ 0.6438, -1.9598],
        [-0.0587, -0.5698],
        [ 0.3129, -1.1715],
        [-0.4262,  0.1696],
        [ 0.3622, -1.2540],
        [-0.5903,  0.4619],
        [ 0.0454, -0.8253],
        [-0.5299,  0.3681],
        [-0.3940,  0.1749],
        [-0.4369,  0.2478],
        [-0.3480, -0.0656],
        [-0.0069, -0.6227],
        [ 0.4581, -1.5777],
        [ 0.1429, -0.8628],
        [-0.2326, -0.2866],
        [ 0.0336, -0.7253],
        [-0.4832,  0.1533],
        [ 0.2969, -1.2915],
        [-0.3714, -0.0024],
        [-0.2292, -0.1585],
        [-0.4653,  0.2756],
        [-0.1765, -0.4206]], device='cuda:0')
pred: [0 0 1 1 0 0 1 0 1 0 0 0 0 1 0 1 0 1 1 1 1 0 0 0 0 0 1 0 1 1 1 0]
real: 

 77%|███████▋  | 40/52 [02:13<00:39,  3.31s/it]

logits:
tensor([[-0.2679, -0.0757],
        [ 0.1952, -1.0682],
        [-0.6484,  0.6502],
        [ 0.2405, -0.9829],
        [-0.1420, -0.3932],
        [ 0.3739, -1.4160],
        [ 0.0808, -0.7464],
        [-0.4932,  0.3566],
        [-0.1139, -0.3591],
        [ 0.1346, -0.8236],
        [-0.4148,  0.1764],
        [ 0.1114, -0.9230],
        [-0.5930,  0.4738],
        [ 0.5376, -1.7542],
        [ 0.0342, -0.7674],
        [-0.4177,  0.0955],
        [ 0.0855, -0.8129],
        [-0.0660, -0.4162],
        [-0.4101,  0.2047],
        [ 0.1498, -0.9129],
        [ 0.6138, -1.7010],
        [-0.0737, -0.5283],
        [-0.1386, -0.3436],
        [-0.1100, -0.3924],
        [-0.3186,  0.0334],
        [-0.2054, -0.2293],
        [ 0.5255, -1.6245],
        [ 0.5661, -1.7580],
        [-0.1473, -0.3123],
        [-0.2396, -0.1527],
        [-0.3563, -0.0493],
        [-0.5896,  0.5747]], device='cuda:0')
pred: [1 0 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 1]
real: 

 79%|███████▉  | 41/52 [02:17<00:36,  3.32s/it]

logits:
tensor([[ 0.0090, -0.6642],
        [-0.0320, -0.6075],
        [-0.5368,  0.4214],
        [-0.3480,  0.0622],
        [-0.1906, -0.2518],
        [-0.4550,  0.1846],
        [-0.5638,  0.4259],
        [-0.5122,  0.2963],
        [-0.0713, -0.5493],
        [-0.4738,  0.3286],
        [-0.0108, -0.6648],
        [ 0.4969, -1.5781],
        [ 0.0480, -0.7753],
        [ 0.3671, -1.2943],
        [ 0.6614, -1.8318],
        [ 0.2133, -1.0278],
        [ 0.2807, -1.2620],
        [-0.4012,  0.1042],
        [-0.1696, -0.1935],
        [-0.3546,  0.0466],
        [-0.0156, -0.5754],
        [-0.5801,  0.4665],
        [ 0.1072, -0.7741],
        [-0.4254,  0.1571],
        [ 0.1958, -1.0348],
        [-0.0033, -0.6742],
        [ 0.5004, -1.6677],
        [-0.0351, -0.6870],
        [ 0.4640, -1.6072],
        [-0.5820,  0.4656],
        [ 0.6814, -1.8873],
        [ 0.6029, -1.8166]], device='cuda:0')
pred: [0 0 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 1 0 0]
real: 

 81%|████████  | 42/52 [02:20<00:33,  3.32s/it]

logits:
tensor([[-0.4368,  0.1170],
        [-0.5657,  0.5322],
        [ 0.2008, -1.0133],
        [ 0.0454, -0.8062],
        [ 0.5421, -1.6106],
        [ 0.3383, -1.2946],
        [-0.4474,  0.1605],
        [-0.3842,  0.0732],
        [ 0.6698, -1.7336],
        [-0.4101,  0.1899],
        [-0.5757,  0.4706],
        [-0.4989,  0.3611],
        [-0.1052, -0.4742],
        [-0.5227,  0.3353],
        [ 0.5561, -1.6995],
        [ 0.0929, -0.7339],
        [-0.5235,  0.3481],
        [ 0.2801, -1.2621],
        [-0.2831, -0.2258],
        [-0.2912, -0.2236],
        [ 0.0119, -0.7150],
        [ 0.1496, -1.0654],
        [-0.4399,  0.2731],
        [-0.5681,  0.4151],
        [-0.2135, -0.3086],
        [-0.3971,  0.1059],
        [ 0.7869, -2.1195],
        [ 0.2810, -1.1903],
        [-0.4335,  0.1716],
        [ 0.6332, -1.9523],
        [ 0.2559, -1.0524],
        [-0.0119, -0.7011]], device='cuda:0')
pred: [1 1 0 0 0 0 1 1 0 1 1 1 0 1 0 0 1 0 1 1 0 0 1 1 0 1 0 0 1 0 0 0]
real: 

 83%|████████▎ | 43/52 [02:23<00:29,  3.31s/it]

logits:
tensor([[ 2.8119e-01, -1.1677e+00],
        [-2.4409e-01, -1.8942e-01],
        [ 1.8316e-01, -9.7056e-01],
        [-2.5232e-01, -7.9470e-02],
        [ 3.6506e-01, -1.2272e+00],
        [-5.2059e-01,  4.0455e-01],
        [-2.9573e-01, -3.4410e-02],
        [ 1.2711e-03, -6.4449e-01],
        [-4.0389e-01,  9.0743e-02],
        [-3.4164e-01, -2.5720e-02],
        [ 3.8264e-01, -1.3523e+00],
        [ 2.8000e-01, -1.1953e+00],
        [-5.8294e-01,  5.0769e-01],
        [-2.5563e-01, -8.5544e-02],
        [-1.2393e-01, -2.9468e-01],
        [ 5.2806e-01, -1.4096e+00],
        [ 6.3130e-01, -1.9275e+00],
        [ 1.8877e-01, -1.0298e+00],
        [-1.5908e-01, -4.4060e-01],
        [ 1.3453e-01, -9.5795e-01],
        [ 2.2235e-01, -9.4844e-01],
        [-1.1472e-01, -3.6182e-01],
        [-4.3767e-01,  1.5408e-01],
        [ 6.2908e-02, -7.6116e-01],
        [-2.5475e-01, -1.1534e-01],
        [-3.6651e-01,  5.5857e-02],
        [-2.0825e-01, -2.3033e-01],
        [-5.4222e-01

 85%|████████▍ | 44/52 [02:27<00:26,  3.33s/it]

logits:
tensor([[-0.4000,  0.1710],
        [-0.0485, -0.4209],
        [-0.4211,  0.1821],
        [ 0.4762, -1.5466],
        [ 0.0304, -0.5780],
        [-0.3631,  0.0070],
        [-0.4014,  0.0342],
        [-0.1125, -0.4271],
        [-0.5426,  0.3249],
        [ 0.1298, -0.9817],
        [ 0.2092, -1.1142],
        [-0.4537,  0.2470],
        [ 0.4703, -1.5242],
        [-0.6161,  0.6193],
        [-0.4412,  0.2701],
        [-0.5087,  0.3772],
        [ 0.3974, -1.2984],
        [-0.3126, -0.0240],
        [-0.4955,  0.3588],
        [ 0.3360, -1.3636],
        [ 0.1971, -1.0612],
        [-0.5099,  0.3190],
        [ 0.5500, -1.6784],
        [-0.5469,  0.4075],
        [ 0.6438, -1.8741],
        [ 0.0679, -0.8857],
        [ 0.0163, -0.7340],
        [ 0.5225, -1.6513],
        [ 0.8148, -2.0876],
        [-0.4164,  0.1764],
        [ 0.3201, -1.1909],
        [ 0.0941, -0.9656]], device='cuda:0')
pred: [1 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 0 1 1 0 0 1 0 1 0 0 0 0 0 1 0 0]
real: 

 87%|████████▋ | 45/52 [02:30<00:23,  3.29s/it]

logits:
tensor([[ 1.8239e-01, -1.0133e+00],
        [-3.8707e-02, -6.5860e-01],
        [-5.6124e-01,  4.0331e-01],
        [ 5.3528e-01, -1.3829e+00],
        [ 1.5610e-01, -9.4811e-01],
        [-5.7889e-01,  4.8020e-01],
        [-2.9749e-02, -5.4868e-01],
        [ 3.1202e-01, -1.2893e+00],
        [-4.4936e-01,  2.0743e-01],
        [-1.2445e-01, -3.8655e-01],
        [-1.9964e-01, -2.1969e-01],
        [-6.1574e-01,  5.0392e-01],
        [-1.1203e-01, -4.1108e-01],
        [ 5.1027e-01, -1.6622e+00],
        [ 1.3883e-02, -6.8254e-01],
        [ 5.5376e-01, -1.7513e+00],
        [-5.9955e-01,  5.4532e-01],
        [-6.9251e-04, -6.6418e-01],
        [-5.1294e-01,  2.3845e-01],
        [-5.4431e-01,  5.0947e-01],
        [-4.7349e-01,  1.8719e-01],
        [ 1.2702e-02, -6.2882e-01],
        [-5.2667e-01,  4.3499e-01],
        [ 6.2869e-01, -1.6809e+00],
        [ 5.9341e-01, -1.7639e+00],
        [ 3.4086e-01, -1.3156e+00],
        [-4.2706e-01,  2.0082e-01],
        [ 1.5377e-01

 88%|████████▊ | 46/52 [02:33<00:19,  3.30s/it]

logits:
tensor([[-8.3905e-02, -4.8312e-01],
        [-5.7051e-01,  4.8513e-01],
        [-2.3770e-01, -9.3326e-02],
        [-6.4354e-01,  6.4297e-01],
        [-2.8808e-01, -9.6691e-02],
        [ 6.4918e-01, -1.8906e+00],
        [ 3.2382e-01, -1.2989e+00],
        [-5.2699e-01,  2.7112e-01],
        [-1.5019e-02, -5.7672e-01],
        [-4.9870e-01,  2.7893e-01],
        [ 6.7192e-01, -1.7980e+00],
        [-1.8095e-01, -2.0934e-01],
        [-4.3378e-01,  2.4011e-01],
        [ 4.5515e-01, -1.5744e+00],
        [ 5.9411e-01, -1.7922e+00],
        [ 5.1973e-01, -1.6996e+00],
        [ 3.1349e-02, -6.6347e-01],
        [-2.1346e-01, -2.9302e-01],
        [ 4.4982e-01, -1.6132e+00],
        [-6.8419e-01,  7.2215e-01],
        [ 4.5651e-02, -7.6652e-01],
        [ 5.2285e-01, -1.6542e+00],
        [-4.2845e-02, -5.2985e-01],
        [-5.4118e-01,  3.1260e-01],
        [-1.9974e-01, -2.4001e-01],
        [ 2.7446e-01, -1.1716e+00],
        [ 1.7114e-03, -6.9326e-01],
        [-4.8929e-01

 90%|█████████ | 47/52 [02:37<00:16,  3.33s/it]

logits:
tensor([[-0.3972,  0.0629],
        [-0.0173, -0.6065],
        [-0.3408, -0.0244],
        [ 0.0540, -0.8123],
        [ 0.3785, -1.2534],
        [-0.2455, -0.1951],
        [ 0.2403, -1.1012],
        [ 0.2042, -0.9967],
        [ 0.3597, -1.4235],
        [-0.3878,  0.0729],
        [ 0.2787, -1.1628],
        [-0.1713, -0.3902],
        [-0.5852,  0.3876],
        [-0.3019, -0.0191],
        [ 0.2525, -1.2635],
        [-0.5236,  0.3316],
        [-0.5800,  0.5220],
        [ 0.6249, -1.7321],
        [-0.4978,  0.3081],
        [ 0.3587, -1.3689],
        [-0.5281,  0.4038],
        [-0.3636,  0.0241],
        [ 0.5141, -1.5590],
        [-0.3370,  0.0044],
        [ 0.3579, -1.3064],
        [-0.0081, -0.6628],
        [ 0.3248, -1.2905],
        [-0.0215, -0.5590],
        [-0.3204, -0.0381],
        [ 0.5018, -1.5688],
        [ 0.0664, -0.7933],
        [-0.5732,  0.4415]], device='cuda:0')
pred: [1 0 1 0 0 1 0 0 0 1 0 0 1 1 0 1 1 0 1 0 1 1 0 1 0 0 0 0 1 0 0 1]
real: 

 92%|█████████▏| 48/52 [02:40<00:13,  3.30s/it]

logits:
tensor([[ 0.4107, -1.4300],
        [ 0.5276, -1.5359],
        [-0.1659, -0.3629],
        [-0.0568, -0.3669],
        [-0.5362,  0.2650],
        [-0.4613,  0.2453],
        [-0.1382, -0.4626],
        [ 0.3152, -1.1169],
        [ 0.5107, -1.5650],
        [-0.1106, -0.4566],
        [ 0.1403, -0.9221],
        [-0.2568, -0.1449],
        [-0.0757, -0.4784],
        [ 0.5196, -1.4154],
        [ 0.4203, -1.4974],
        [ 0.5391, -1.7854],
        [ 0.4661, -1.5437],
        [-0.2444, -0.2918],
        [-0.3454,  0.0021],
        [-0.5844,  0.4777],
        [-0.0821, -0.3221],
        [ 0.3386, -1.4013],
        [ 0.1252, -0.8089],
        [ 0.2984, -1.2289],
        [-0.2591, -0.0518],
        [ 0.0942, -0.6711],
        [-0.5986,  0.5089],
        [ 0.3096, -1.2824],
        [-0.4736,  0.3034],
        [-0.0847, -0.5188],
        [-0.4925,  0.3211],
        [-0.1871, -0.4044]], device='cuda:0')
pred: [0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 0 1 0]
real: 

 94%|█████████▍| 49/52 [02:43<00:09,  3.26s/it]

logits:
tensor([[-0.1984, -0.2541],
        [-0.0708, -0.4791],
        [ 0.5188, -1.6280],
        [-0.4435,  0.2834],
        [-0.2282, -0.2056],
        [ 0.0209, -0.7985],
        [ 0.3565, -1.4074],
        [ 0.2147, -1.1058],
        [ 0.1188, -0.8682],
        [ 0.3419, -1.1701],
        [ 0.6455, -1.8848],
        [-0.1175, -0.4760],
        [ 0.3558, -1.3461],
        [ 0.5279, -1.6255],
        [ 0.3106, -1.1986],
        [ 0.1711, -1.0302],
        [ 0.1383, -0.9494],
        [-0.2140, -0.1546],
        [-0.5398,  0.3766],
        [-0.4096,  0.1994],
        [-0.6471,  0.6199],
        [-0.3082,  0.1031],
        [-0.0467, -0.5863],
        [-0.0663, -0.5763],
        [-0.4216,  0.1410],
        [ 0.4773, -1.6385],
        [ 0.5043, -1.5409],
        [-0.3090, -0.0426],
        [-0.5184,  0.2921],
        [-0.2130, -0.3281],
        [-0.4041,  0.1051],
        [-0.0479, -0.5706]], device='cuda:0')
pred: [0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 1 0 0 1 1 0 1 0]
real: 

 96%|█████████▌| 50/52 [02:46<00:06,  3.26s/it]

logits:
tensor([[ 0.0954, -0.8676],
        [ 0.2210, -0.8975],
        [-0.0512, -0.5569],
        [ 0.5312, -1.6052],
        [-0.4619,  0.2646],
        [ 0.4715, -1.5159],
        [-0.4657,  0.2942],
        [-0.4671,  0.2997],
        [-0.4699,  0.3421],
        [-0.5962,  0.6170],
        [ 0.0182, -0.5943],
        [-0.2515, -0.1118],
        [-0.0163, -0.6494],
        [ 0.3723, -1.4274],
        [ 0.1845, -1.0061],
        [-0.5633,  0.4609],
        [-0.1548, -0.3139],
        [ 0.6702, -1.8692],
        [ 0.0204, -0.7708],
        [ 0.0315, -0.7391],
        [-0.1015, -0.5483],
        [-0.5867,  0.5549],
        [-0.3554, -0.0532],
        [ 0.1748, -1.0269],
        [ 0.1256, -0.9677],
        [ 0.1732, -1.0392],
        [-0.1207, -0.4845],
        [ 0.0634, -0.7387],
        [-0.0424, -0.5940],
        [ 0.3056, -1.2439],
        [ 0.1248, -0.8896],
        [ 0.1116, -0.7112]], device='cuda:0')
pred: [0 0 0 0 1 0 1 1 1 1 0 1 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0]
real: 

 98%|█████████▊| 51/52 [02:49<00:03,  3.23s/it]

logits:
tensor([[-0.5774,  0.5283],
        [ 0.4169, -1.3345],
        [ 0.2733, -1.2787],
        [-0.5743,  0.4177],
        [-0.3871,  0.0583],
        [-0.5413,  0.4476],
        [-0.4351,  0.2273],
        [ 0.4457, -1.4962],
        [-0.0616, -0.5145],
        [-0.5833,  0.3917],
        [ 0.1252, -0.7919],
        [ 0.5892, -1.8584],
        [-0.4215,  0.1664],
        [-0.6264,  0.5891],
        [-0.4277,  0.2489],
        [ 0.3709, -1.3164],
        [-0.1728, -0.2653],
        [ 0.3747, -1.5120],
        [ 0.0539, -0.6752],
        [ 0.0317, -0.7339],
        [ 0.2583, -1.1859],
        [-0.2541, -0.0715],
        [-0.1569, -0.3334],
        [ 0.2857, -1.2456],
        [ 0.2925, -1.1918],
        [ 0.1030, -0.9642],
        [ 0.3240, -1.3383],
        [-0.3455,  0.0069],
        [-0.5743,  0.4032],
        [-0.2658,  0.0204],
        [-0.0736, -0.4006],
        [ 0.2201, -1.1670]], device='cuda:0')
pred: [1 0 0 1 1 1 1 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0]
real: 

  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 52/52 [02:51<00:00,  3.30s/it]

logits:
tensor([[ 0.5025, -1.5477],
        [-0.0865, -0.4955],
        [ 0.2108, -1.1411],
        [-0.4233,  0.1535],
        [-0.4436,  0.2193],
        [ 0.4910, -1.6862],
        [-0.1616, -0.3629],
        [ 0.3728, -1.2294],
        [ 0.0827, -0.8174],
        [-0.3798,  0.0305],
        [ 0.0772, -0.8904],
        [-0.5074,  0.4236],
        [ 0.2073, -1.0136],
        [-0.2815, -0.0066],
        [ 0.3965, -1.3593]], device='cuda:0')
pred: [0 0 0 1 1 0 0 0 0 1 0 1 0 1 0]
real: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

	test loss: nan

	test acc: 0.6834935897435896

	test prec: 0.8781675322146743

	test rec: 0.6834935897435896

	test f1: 0.7418944145703331



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [24]:
print("pred_flat:", "\n", pred_flat)
print("\n")
print("logits", "\n", logits)

pred_flat: 
 [0 0 0 1 1 0 0 0 0 1 0 1 0 1 0]


logits 
 [[ 0.5025339  -1.5477275 ]
 [-0.08648707 -0.49553707]
 [ 0.2107775  -1.1410849 ]
 [-0.42333165  0.15351833]
 [-0.44360977  0.21931951]
 [ 0.4910407  -1.6862265 ]
 [-0.16157067 -0.36286414]
 [ 0.372751   -1.2294025 ]
 [ 0.08265491 -0.8174026 ]
 [-0.37982374  0.03051349]
 [ 0.0771727  -0.89037675]
 [-0.50736415  0.42355138]
 [ 0.20730768 -1.0136122 ]
 [-0.28148508 -0.00660457]
 [ 0.39653978 -1.3593024 ]]


### printig some variabels to look into 

In [None]:
train_loss


In [None]:
train_acc 

In [None]:
train_prec

In [None]:

train_rec

In [None]:

train_f1

In [None]:
val_accuracy

In [None]:
val_loss

In [None]:
val_acc

In [None]:
val_prec

In [None]:
val_rec

In [None]:
val_f1

In [None]:
test_accuracy

In [None]:
test_loss

In [None]:
test_acc

In [None]:
test_prec

In [None]:
test_rec

In [None]:
test_f1