In [18]:
import pandas as pd
import numpy as np
import spacy 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef
from transformers import BertForSequenceClassification, AutoTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import random
import os
import torch.nn.functional as F
import torch
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
import transformers
from tqdm import tqdm, trange
from utils import normalizeTweet, split_into_sentences, bio_tagging, create_training_data



#data = pd.read_excel("/home/adrian/workspace/causality/Causal-associations-diabetes-twitter/data/Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")
# data = pd.read_excel("/home/adrian/Downloads/Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")
data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")

print("Total count:", data.shape[0])
data = data[data["Causal association"].notnull()]
data = data[["full_text", "Intent", "Cause", "Effect", "Causal association"]]
print("Labeled count:", data.shape[0])

data.head()

  data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")


Total count: 5434
Labeled count: 5000


Unnamed: 0,full_text,Intent,Cause,Effect,Causal association
0,"tonight , I learned my older girl will back he...",,,,0.0
1,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0.0
2,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,mS,,,0.0
3,USER Cheers ! Have one for this diabetic too !,mS,,,0.0
4,USER Additionally the medicines are being char...,,medicines are being charged at MRP,costing much higher,1.0


## Add BIO tags

In [19]:
data["tokenized"] = data["full_text"].map(lambda tweet: normalizeTweet(tweet).split(" "))
data["bio_tags"] = data.apply(lambda row: bio_tagging(row["full_text"],row["Cause"], row["Effect"]), axis=1)
data.head(n=20)

Unnamed: 0,full_text,Intent,Cause,Effect,Causal association,tokenized,bio_tags
0,"tonight , I learned my older girl will back he...",,,,0.0,"[tonight, ,, I, learned, my, older, girl, will...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
1,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0.0,"[USER, USER, I, knew, diabetes, and, fibromyal...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
2,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,mS,,,0.0,"[:down_arrow:, :down_arrow:, :down_arrow:, THI...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
3,USER Cheers ! Have one for this diabetic too !,mS,,,0.0,"[USER, Cheers, !, Have, one, for, this, diabet...","[O, O, O, O, O, O, O, O, O, O]"
4,USER Additionally the medicines are being char...,,medicines are being charged at MRP,costing much higher,1.0,"[USER, Additionally, the, medicines, are, bein...","[O, O, O, B-C, I-C, I-C, I-C, I-C, I-C, O, O, ..."
5,USER USER We have those days Esp . if it inter...,msS,diabetic,hate,1.0,"[USER, USER, We, have, those, days, Esp, ., if...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
6,Why all of a sudden are people hungry and vuln...,q,,,0.0,"[Why, all, of, a, sudden, are, people, hungry,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
7,"i got lime for my glucose test , wasn't that b...",,glucose test,nauseous,1.0,"[i, got, lime, for, my, glucose, test, ,, was,...","[O, O, O, O, O, B-C, I-C, O, O, O, O, O, O, O,..."
8,This stickur of Unkel Funny iz ware i am shave...,,,,0.0,"[This, stickur, of, Unkel, Funny, iz, ware, i,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
9,For the second time in my life I gave myself i...,mS,,,0.0,"[For, the, second, time, in, my, life, I, gave...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."


## Split all tweets into sentences => new dataframe with more rows

In [20]:
def get_start_end_index_of_sentence_in_tweet(tweet, sentence):
    """ 
    The sentence tokens are included in the tweet tokens.
    Return the start end end indices of the sentence tokens in the tweet tokens

    """

    sentence_start_word = sentence[0]
    start_indices = [i for i, x in enumerate(tweet) if x == sentence_start_word] # find all indices of the start word of the sentence 
    try:
        for start_index in start_indices:
            isTrueStartIndex = all([tweet[start_index+i] == sentence[i] for i in range(len(sentence))])
            #print("start_index:", start_index, "isTrueStartIndex:", isTrueStartIndex)
            if isTrueStartIndex:
                return start_index, start_index + len(sentence) 
    except:
        print("ERROR: StartIndex should have been found for sentence:")
        print("tweet:")
        print(tweet)
        print("sentence:")
        print(sentence)
    return -1, -2 # should not be returned


def split_tweets_to_sentences(data):
    """ 
        Splits tweets into sentences and associates the appropriate intent, causes, effects and causal association
        to each sentence.
        
        Parameters:
        - min_words_in_sentences: Minimal number of words in a sentence such that the sentence is kept. 
                                  Assumption: A sentence with too few words does not have enough information
                              
                              
                              
        Ex.:
        full_text                              | Intent | Cause | Effect | Causal association | ...
        --------------------------------------------------------------------------------------------
        what? type 1 causes insulin dependence | q;msS  | type 1|insulin dependence | 1       | ...  
        
        New dataframe returned: 
        full_text                              | Intent | Cause | Effect | Causal association | ...
        --------------------------------------------------------------------------------------------
        what?                                  |   q    |       |        |       0            | ...
        type 1 causes insulin dependence       |        | type 1| insulin dependence | 1       | ...  
    """

    newDF = pd.DataFrame(columns=["sentence", "Intent", "Cause", "Effect", "Causal association", "tokenized", "bio_tags"])
    
    for i,row in data.iterrows():
        causes = row["Cause"]
        effects = row["Effect"]
        sentences = split_into_sentences(normalizeTweet(row["full_text"]))

        # single sentence in tweet
        if len(sentences) == 1:
            singleSentenceIntent = ""
            if isinstance(row["Intent"], str):
                if len(row["Intent"].split(";")) > 1:
                    singleSentenceIntent = row["Intent"].strip().replace(";msS", "").replace("msS;", "").replace(";mS", "").replace("mS;", "")
                else:
                    if row["Intent"] == "mS" or row["Intent"] == "msS":
                        singleSentenceIntent = ""
                    else:
                        singleSentenceIntent = row["Intent"].strip()
                    
            newDF=newDF.append(pd.Series({"sentence": sentences[0] # only one sentence
                         , "Intent": singleSentenceIntent
                         , "Cause" : row["Cause"]
                         , "Effect": row["Effect"]
                         , "Causal association" : row["Causal association"]
                         , "tokenized": row["tokenized"]
                         , "bio_tags": row["bio_tags"]}), ignore_index=True)
        
        # tweet has several sentences
        else: 
            intents = str(row["Intent"]).strip().split(";")
            
            for sentence in sentences:
                sent_tokenized = sentence.split(" ")
                
                causeInSentence = np.nan if not isinstance(causes, str) or not any([cause in sentence for cause in causes.split(";")]) else ";".join([cause for cause in causes.split(";") if cause in sentence])
                effectInSentence = np.nan if not isinstance(effects, str) or not any([effect in sentence for effect in effects.split(";")]) else ";".join([effect for effect in effects.split(";") if effect in sentence])
                causalAssociationInSentence = 1 if isinstance(causeInSentence, str) and isinstance(effectInSentence, str) else 0
                
                startIndex, endIndex = get_start_end_index_of_sentence_in_tweet(row["tokenized"], sent_tokenized)
                sentence_tokenized = row["tokenized"][startIndex:endIndex]
                sentence_bio_tags = row["bio_tags"][startIndex:endIndex]
                
                if "q" in intents and sentence[-1] == "?": # if current sentence is question
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": "q", "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)                    
                elif "joke" in intents: # all sentences with "joke" in tweet keep the intent "joke"
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": "joke", "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)   
                elif "neg" in intents: # all sentences with "neg" in tweet keep intent "neg"
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": "neg", "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)               
                elif isinstance(causeInSentence, str) and isinstance(effectInSentence, str): # cause effect sentence
                    causalIntent = ""
                    if len(causeInSentence.split(";")) > 1:
                        causalIntent = "mC"
                        if len(effectInSentence.split(";")) > 1:
                            causalIntent = "mC;mE"
                    elif len(effectInSentence.split(";")) > 1:
                        causalIntent = "mE"
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": causalIntent, "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)                                  
                else:
                    nonCausalIntent = ""
                    if isinstance(causeInSentence, str): # only cause is given
                        if len(causeInSentence.split(";")) > 1:
                            nonCausalIntent = "mC"
                    elif isinstance(effectInSentence, str): # only effect is given
                        if len(effectInSentence.split(";")) > 1:
                            nonCausalIntent = "mE"
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": nonCausalIntent, "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)

    return newDF
       
# sample: has one example for each possible "Intent" value
#allIntents = data["Intent"].value_counts().keys().tolist()
#sample = data[data["Intent"] == "mS"][0:1]
#for intent in allIntents:
#    sample = sample.append(data[data["Intent"] == intent][1:2])
#print(sample.shape)

#i = 19
#test = sample[i:i+1]
#dataSentences = split_tweets_to_sentences(test)
#dataSentences.head(30)
#test.head()

print("N tweets:", data.shape[0])
dataSentences = split_tweets_to_sentences(data)
print("N sentences:", dataSentences.shape[0])
dataSentences.head()

N tweets: 5000
N sentences: 11784


Unnamed: 0,sentence,Intent,Cause,Effect,Causal association,tokenized,bio_tags
0,"tonight , I learned my older girl will back he...",,,,0,"[tonight, ,, I, learned, my, older, girl, will...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
1,Fiercely .,,,,0,"[Fiercely, .]","[O, O]"
2,#impressive #bigsister #type1 #type1times2,,,,0,"[#impressive, #bigsister, #type1, #type1times2]","[O, O, O, O]"
3,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0,"[USER, USER, I, knew, diabetes, and, fibromyal...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
4,:face_with_rolling_eyes:,joke,,,0,[:face_with_rolling_eyes:],[O]


### Filter out negation, jokes, questions and sentences with a minimal token length of 3

In [21]:
print("N sentences before filtering: ", dataSentences.shape[0])
dataSentFiltered = dataSentences[~dataSentences["Intent"].str.contains("neg|joke|q")] # remove sentences with joke, q, neg
dataSentFiltered = dataSentFiltered[dataSentFiltered["tokenized"].map(len) >= 3] # only keep sentences with at least 3 words
print("N sentences after filtering: ", dataSentFiltered.shape[0])
dataSentFiltered.head()


N sentences before filtering:  11784
N sentences after filtering:  8835


Unnamed: 0,sentence,Intent,Cause,Effect,Causal association,tokenized,bio_tags
0,"tonight , I learned my older girl will back he...",,,,0,"[tonight, ,, I, learned, my, older, girl, will...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
2,#impressive #bigsister #type1 #type1times2,,,,0,"[#impressive, #bigsister, #type1, #type1times2]","[O, O, O, O]"
5,:down_arrow: :down_arrow: :down_arrow: THIS :d...,,,,0,"[:down_arrow:, :down_arrow:, :down_arrow:, THI...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
6,I 'm a trans woman .,,,,0,"[I, 'm, a, trans, woman, .]","[O, O, O, O, O, O]"
7,"Both of us could use a world where "" brave and...",,,,0,"[Both, of, us, could, use, a, world, where, "",...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."


In [22]:
dataSentFiltered["Intent"].value_counts()

         8705
mE         72
mC         47
mC;mE      10
mE;mC       1
Name: Intent, dtype: int64

### Only work on cause-effect tweets

In [23]:
dataSentFiltered["Causal association"].value_counts()

0.0    7799
1.0    1036
Name: Causal association, dtype: int64

### only take sentences with cause and effect


In [24]:
trainingData = dataSentFiltered[dataSentFiltered["Causal association"] == 1]
trainingData.shape

(1036, 7)

### Create training, validation, test sets

In [25]:
trainingDataSample = trainingData#.sample(n=200)   # VIVEK: DELETE TAKING SAMPLE. THIS WAS ONLY FOR TESTING
train = trainingDataSample.sample(frac=0.8, random_state=0)
test = trainingDataSample.drop(train.index)
validate = train.sample(frac=0.2, random_state=0)
train = train.drop(validate.index)
print("Train:", train.shape)
print("Validate:", validate.shape)
print("Test:", test.shape)

Train: (663, 7)
Validate: (166, 7)
Test: (207, 7)


In [26]:

# Transform labels + encodings into Pytorch DataSet object (including __len__, __getitem__)
class TweetDataSet(torch.utils.data.Dataset):
    def __init__(self, text, labels, bio_tags, tokenizer):
        self.text = text
        self.labels = labels
        self.tokenizer = tokenizer
        self.bio_tags = bio_tags
        self.tag2id = {label: idx for idx, label in enumerate(["O", "B-C", "I-C", "B-E", "I-E"])}
        self.tag2id[-100] = -100
        self.id2tag = {id:tag for tag,id in self.tag2id.items()}

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.text, padding=True, truncation=True, return_token_type_ids=True)
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]
        bio_tags_extended = self.extend_tags(self.text[idx], self.bio_tags[idx], ids[idx])
        assert(len(ids[idx]) == len(bio_tags_extended), "token ids and BIO tags lengths do not match!")
        assert(len(ids[idx]) == len(bio_tags_extended), "token ids and BIO tags lengths do not match!")
        return {
                "input_ids" : torch.tensor(ids[idx], dtype=torch.long)
              , "attention_mask" : torch.tensor(mask[idx], dtype=torch.long)
              , "token_type_ids" : torch.tensor(token_type_ids[idx], dtype=torch.long)
              , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
              , "bio_tags" : torch.tensor(list(map(lambda bioTags: self.tag2id[bioTags], bio_tags_extended))
, dtype=torch.long)
        }

    def __len__(self):
        return len(self.labels)

    
    def extend_tags(self, tokens_old, tags_old, ids_tokenized_padded):
        """ 
            Each token has a BIO tag label. 
            However BERT's tokenization splits tokens into subwords. How to label those subwords?
            
            Option 1:
            ---------
            
            add the same label to each subword than the first subword. Only replace "B" by "I"
            Ex. 
            #lowbloodsugar => '#low@@', 'blood@@', 'sugar@@'
               "B-C"       =>   "B-C" ,   "I-C"  ,   "I-C"
            
            Option 2 (implemented):      
            ---------
            
            From : https://huggingface.co/transformers/custom_datasets.html#token-classification-with-w-nut-emerging-entities
            A common obstacle with using pre-trained models for token-level classification: many of the tokens in
            the W-NUT corpus are not in DistilBert’s vocabulary. Bert and many models like it use a method called 
            WordPiece Tokenization, meaning that single words are split into multiple tokens such that each token
            is likely to be in the vocabulary. For example, DistilBert’s tokenizer would split the Twitter 
            handle @huggingface into the tokens ['@', 'hugging', '##face']. This is a problem for us because we 
            have exactly one tag per token. If the tokenizer splits a token into multiple sub-tokens, then we will
            end up with a mismatch between our tokens and our labels.

            One way to handle this is to only train on the tag labels for the first subtoken of a split token. 
            We can do this in 🤗 Transformers by setting the labels we wish to ignore to -100. 
            In the example above, if the label for @HuggingFace is 3 (indexing B-corporation), we would set 
            the labels of ['@', 'hugging', '##face'] to [3, -100, -100].
        """
        tags = [-100] # add for start token <CLS>
        for token_old, tag in zip(tokens_old.split(" "), tags_old):
#            print(F"\ntoken_old: {token_old};    tag: {tag}")
            for i, sub_token in enumerate(self.tokenizer.tokenize(token_old)):
                if (i == 0):
                    tags.append(tag)
                else: 
                    tags.append(-100)
           
        tags.append(-100) # 0 for end of sentence token
    
        # append -100 for all padded elements
        padded_elements = ids_tokenized_padded.count(1) # id 1 is <PAD> ; Alternative: where attention_mask == 0 add -100
        tags.extend([-100]*padded_elements)
        
        return tags
        
        
    
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")

train_dataset = TweetDataSet(train["sentence"].values.tolist()
                           , train["Causal association"].values.tolist()
                           , train["bio_tags"].values.tolist()
                           , tokenizer)
val_dataset = TweetDataSet(validate["sentence"].values.tolist()
                           , validate["Causal association"].values.tolist()
                           , validate["bio_tags"].values.tolist()
                           , tokenizer)
test_dataset = TweetDataSet(test["sentence"].values.tolist()
                           , test["Causal association"].values.tolist()
                           , test["bio_tags"].values.tolist()
                           , tokenizer)
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

# put data to batches
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True)


  assert(len(ids[idx]) == len(bio_tags_extended), "token ids and BIO tags lengths do not match!")
  assert(len(ids[idx]) == len(bio_tags_extended), "token ids and BIO tags lengths do not match!")
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


663
166
207


In [27]:
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
from transformers import AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(pred, labels):
    """
        Dataset is unbalanced -> measure weighted metrics
        Calculate metrics for each label, and find their average wieghted by support (Number of true instances for each label)
        This alters 'macro' to account for label imbalance;
        it can result in an F-Score taht is not between precision and recall
    """
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='macro') # TODO: check weightin
    acc = accuracy_score(labels, pred)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }



class CausalNER(torch.nn.Module):
    """ Model Bert"""
    def __init__(self):
        super(CausalNER, self).__init__()
        self.num_labels = 5 # B-C, I-C, B-E, I-E, O
        self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
        self.dropout = torch.nn.Dropout(0.3)
        self.linear1 = torch.nn.Linear(768, 256)
        self.linear2 = torch.nn.Linear(256, self.num_labels)
        self.softmax = torch.nn.Softmax(-1)
        
    def forward(self, input_ids, attention_mask, token_type_ids):
#        _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False) # if output 1 is our cls token
        output_seq, _ = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False) # if output 1 is our cls token
        output_2 = self.dropout(output_seq)
        output_3 = self.linear1(output_2)
        output_4 = self.dropout(output_3)
        output_5 = self.linear2(output_4)
        return output_5


### Model parameters

In [28]:
batchsize_train = 16
lr = 1e-3
adam_eps = 1e-8
epochs = 35
num_warmup_steps = 0
num_training_steps = len(train_loader)*epochs

In [29]:
# Store our loss and learning rate for plotting
train_loss_set = []
learning_rate = []


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = CausalNER()
model.to(device)

# fine-tune only the task-specific parameters -> Vivek? 
for param in model.bert.parameters():
    param.requires_grad = False
    


optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
# scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0;
# after a warmup period during which it increases linearly from to the initial lr set in the optimizer
scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) 

loss_fn = CrossEntropyLoss(ignore_index=-100) # ignore subwords/tokens with label -100 


You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.1.attention.self.value.bias', 'roberta.encoder.layer.6.attention.self.query.bias', 'roberta.encoder.layer.10.attention.self.query.bias', 'roberta.encoder.layer.4.attention.self.value.weight', 'roberta.encoder.layer.4.output.dense.bias', 'roberta.encoder.layer.6.output.LayerNorm.bias', 'roberta.encoder.layer.4.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.self.value.weight', 'roberta.encoder.layer.10.output.dense.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.10.attention.self.value.weight', 'roberta.encoder.layer.8.attention.output.LayerNorm.bias', 'roberta.encoder.layer.9.attention.self.value.bias', 'roberta.encoder.layer.11.attention.self.k

### Training

In [30]:
N_bio_tags = 5 # "O", "B-C", "I-C", "B-E", "I-C"
for epoch in trange(1, epochs+1, desc='Epoch'):
    print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")

    
    ############ training eval metrics ######################
    nb_tr_steps = 0 # Tracking variables
    train_loss = []
    train_acc = []
    train_prec = []
    train_rec = []
    train_f1 = []
    
    #########################################################
    
    
    for batch in tqdm(train_loader):
        optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch["token_type_ids"].to(device)
        labels = batch['labels'].to(device)
        bio_tags = batch['bio_tags'].to(device)
        
        ################################################
        model.train() # set model to training mode
        logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass

        ################################################ 
        # similar to the class RobertaForToken classification in transformers: https://github.com/huggingface/transformers/blob/master/src/transformers/models/roberta/modeling_roberta.py
        active_loss = attention_mask.view(-1) == 1  # either based on attention_mask (includes <CLS>, <SEP> token)
        active_logits = logits.view(-1, N_bio_tags)[active_loss] # N_bio_tags=5 
        active_tags = bio_tags.view(-1)[active_loss]
        loss = loss_fn(active_logits, active_tags)             
        print("loss:", loss)       ## TODO VIVEK: check loss function calculation
        loss.backward() # backward pass
        optim.step()    # update parameters and take a steup using the computed gradient
        scheduler.step()# update learning rate scheduler
        train_loss.append(loss.item())
            
            
        ################## Training Performance Measures ##########
        logits = logits.detach().to('cpu').numpy()
        tags_ids = bio_tags.to('cpu').numpy()

        # calculate performance measures only on tokens and not subwords or special tokens
        tags_mask = tags_ids != -100 # only get token labels and not labels from subwords or special tokens
        pred = np.argmax(logits, axis=2)[tags_mask] #.flatten() # convert logits to list of predicted labels
        tags = tags_ids[tags_mask]                      
                
        metrics = compute_metrics(pred, tags)
        train_acc.append(metrics["accuracy"])
        train_prec.append(metrics["precision"])
        train_rec.append(metrics["recall"])
        train_f1.append(metrics["f1"])
                          
        nb_tr_steps += 1
           
    print(F'\n\tTraining Loss: {np.mean(train_loss)}')
    print(F'\n\tTraining acc: {np.mean(train_acc)}')
    print(F'\n\tTraining prec: {np.mean(train_prec)}')
    print(F'\n\tTraining rec: {np.mean(train_rec)}')
    print(F'\n\tTraining f1: {np.mean(train_f1)}')
                          
                          
    # store the current learning rate
    for param_group in optim.param_groups:
        print("\n\tCurrent Learning rate: ", param_group['lr'])
        learning_rate.append(param_group['lr'])
    

    ############# Validation ################
    
    nb_eval_steps = 0 # Tracking variables
    val_accuracy = []
    val_loss = []
    val_acc = []
    val_prec = []
    val_rec = []
    val_f1 = []

    # Evaluate data for one epoch
    for batch in tqdm(validation_loader):
        batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
        v_input_ids, v_input_mask, v_token_type_ids, v_labels, v_bio_tags = batch  # unpack inputs from dataloader
        
        with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
            model.eval() # put model in evaluation mode for validation set
            logits = model(**{"input_ids":v_input_ids, "attention_mask":v_input_mask, "token_type_ids":v_token_type_ids}) # forward pass, calculates logit predictions

        ######################################################
        
        # similar to the class RobertaForToken classification in transformers: https://github.com/huggingface/transformers/blob/master/src/transformers/models/roberta/modeling_roberta.py
        v_active_loss = v_input_mask.view(-1) == 1  # either based on attention_mask (includes <CLS>, <SEP> token)
        v_active_logits = logits.view(-1, N_bio_tags)[v_active_loss] # 5 
        v_active_tags = v_bio_tags.view(-1)[v_active_loss]
        v_loss = loss_fn(v_active_logits, v_active_tags)             
        val_loss.append(v_loss.item())
              
        #########################################################
        logits = logits.detach().to('cpu').numpy()
        tags_ids = v_bio_tags.to('cpu').numpy()

        # calculate performance measures only on tokens and not subwords or special tokens
        tags_mask = tags_ids != -100 # only get token labels and not labels from subwords or special tokens
        pred = np.argmax(logits, axis=2)[tags_mask] #.flatten() # convert logits to list of predicted labels
        tags = tags_ids[tags_mask]#.flatten()        
        
        metrics = compute_metrics(pred, tags)
        val_acc.append(metrics["accuracy"])
        val_prec.append(metrics["precision"])
        val_rec.append(metrics["recall"])
        val_f1.append(metrics["f1"])
                              
        nb_eval_steps += 1
        
    print(F'\n\tValidation Loss: {np.mean(val_loss)}')
    print(F'\n\tValidation acc: {np.mean(val_acc)}')
    print(F'\n\tValidation prec: {np.mean(val_prec)}')
    print(F'\n\tValidation rec: {np.mean(val_rec)}')
    print(F'\n\tValidation f1: {np.mean(val_f1)}')
    


Epoch:   0%|          | 0/35 [00:00<?, ?it/s]
  0%|          | 0/42 [00:00<?, ?it/s][A



  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)

  2%|▏         | 1/42 [00:00<00:34,  1.17it/s][A

loss: tensor(1.6029, device='cuda:0', grad_fn=<NllLossBackward>)


  _warn_prf(average, modifier, msg_start, len(result))

  5%|▍         | 2/42 [00:01<00:32,  1.25it/s][A

loss: tensor(0.8970, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.29it/s][A

loss: tensor(1.2179, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.25it/s][A

loss: tensor(1.4246, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.26it/s][A

loss: tensor(1.2188, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.28it/s][A

loss: tensor(0.8770, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.24it/s][A

loss: tensor(0.7307, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:26,  1.26it/s][A

loss: tensor(0.8001, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:25,  1.27it/s][A

loss: tensor(0.8453, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:24,  1.28it/s][A

loss: tensor(0.9188, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.24it/s][A

loss: tensor(0.8474, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.27it/s][A

loss: tensor(0.8642, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:22,  1.28it/s][A

loss: tensor(0.7121, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:21,  1.29it/s][A

loss: tensor(0.7467, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:21,  1.25it/s][A

loss: tensor(0.8227, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.28it/s][A

loss: tensor(0.8933, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.26it/s][A

loss: tensor(0.9118, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:18,  1.29it/s][A

loss: tensor(0.9265, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.26it/s][A

loss: tensor(0.7235, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.27it/s][A

loss: tensor(0.7265, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.29it/s][A

loss: tensor(0.7014, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.30it/s][A

loss: tensor(0.7347, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.26it/s][A

loss: tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:18<00:14,  1.28it/s][A

loss: tensor(0.9066, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.30it/s][A

loss: tensor(0.6404, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.30it/s][A

loss: tensor(0.7184, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.26it/s][A

loss: tensor(0.8123, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:10,  1.28it/s][A

loss: tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:22<00:10,  1.29it/s][A

loss: tensor(0.8175, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.30it/s][A

loss: tensor(0.7200, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.27it/s][A

loss: tensor(0.7331, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:07,  1.29it/s][A

loss: tensor(1.0688, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:25<00:06,  1.30it/s][A

loss: tensor(0.7872, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:26<00:06,  1.31it/s][A

loss: tensor(0.6813, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.27it/s][A

loss: tensor(0.6785, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.28it/s][A

loss: tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:28<00:03,  1.29it/s][A

loss: tensor(0.8532, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:29<00:03,  1.30it/s][A

loss: tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:30<00:02,  1.26it/s][A

loss: tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.28it/s][A

loss: tensor(0.8942, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.29it/s][A

loss: tensor(0.7832, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:32<00:00,  1.29it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.39it/s][A

loss: tensor(0.8724, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.8448959759303502

	Training acc: 0.7865254531553008

	Training prec: 0.2256363206516822

	Training rec: 0.20790072047036662

	Training f1: 0.19109207982752072

	Current Learning rate:  0.0009714285714285714



 10%|▉         | 2/21 [00:00<00:02,  7.83it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  7.99it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.36it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.61it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.78it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.89it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.94it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.96it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.97it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.96it/s][A
 57%|█████▋    | 12/21 [00:01<00:00,  9.00it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  9.03it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  9.06it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  9.09it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  9.08it/s][A
 81%|████████  | 17/21 [00:01<00:00,  9.10it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  9.11it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  9.11it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.61it/s][A
Epoch:   3%|▎         | 1/35 [00:34<19


	Validation Loss: 0.7402940165428888

	Validation acc: 0.7956705052885392

	Validation prec: 0.2632740865217438

	Validation rec: 0.22820182538446937

	Validation f1: 0.22005484445002696



  2%|▏         | 1/42 [00:00<00:31,  1.32it/s][A

loss: tensor(0.6007, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:30,  1.30it/s][A

loss: tensor(0.5787, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:29,  1.31it/s][A

loss: tensor(0.7387, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.25it/s][A

loss: tensor(0.8107, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.28it/s][A

loss: tensor(0.6998, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.27it/s][A

loss: tensor(0.6319, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.28it/s][A

loss: tensor(0.7725, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.25it/s][A

loss: tensor(0.7474, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.26it/s][A

loss: tensor(0.6518, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.24it/s][A

loss: tensor(0.8183, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.23it/s][A

loss: tensor(0.7709, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:25,  1.19it/s][A

loss: tensor(0.8074, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:24,  1.19it/s][A

loss: tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:23,  1.19it/s][A

loss: tensor(0.7762, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.19it/s][A

loss: tensor(0.8039, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:22,  1.16it/s][A

loss: tensor(0.7129, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:21,  1.17it/s][A

loss: tensor(0.7960, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:20,  1.17it/s][A

loss: tensor(0.7973, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:19,  1.17it/s][A

loss: tensor(0.7363, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:19,  1.15it/s][A

loss: tensor(0.6727, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:18,  1.17it/s][A

loss: tensor(0.7713, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:18<00:17,  1.17it/s][A

loss: tensor(0.8049, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:19<00:16,  1.18it/s][A

loss: tensor(0.6856, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:15,  1.16it/s][A

loss: tensor(0.7711, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.18it/s][A

loss: tensor(0.5828, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.21it/s][A

loss: tensor(0.6303, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:22<00:12,  1.23it/s][A

loss: tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:23<00:11,  1.19it/s][A

loss: tensor(0.8827, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.22it/s][A

loss: tensor(0.6582, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.25it/s][A

loss: tensor(0.7355, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.25it/s][A

loss: tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.21it/s][A

loss: tensor(0.6542, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:27<00:07,  1.21it/s][A

loss: tensor(0.7393, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:28<00:06,  1.18it/s][A

loss: tensor(0.8000, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.20it/s][A

loss: tensor(0.5879, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:05,  1.18it/s][A

loss: tensor(0.7820, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.21it/s][A

loss: tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:31<00:03,  1.24it/s][A

loss: tensor(0.7867, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:32<00:02,  1.22it/s][A

loss: tensor(0.8004, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:33<00:01,  1.23it/s][A

loss: tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.25it/s][A

loss: tensor(0.5734, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:34<00:00,  1.23it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.64it/s][A

loss: tensor(0.5350, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.7137429018815359

	Training acc: 0.8061907498172677

	Training prec: 0.3131686926120019

	Training rec: 0.2399690579290518

	Training f1: 0.23851890329955255

	Current Learning rate:  0.0009428571428571429



 10%|▉         | 2/21 [00:00<00:02,  8.62it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.56it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.58it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.57it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.58it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.47it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.52it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.53it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.53it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.56it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.56it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.57it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.56it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.57it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.50it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.51it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.52it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.54it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.64it/s][A
Epoch:   6%|▌         | 2/35 [01:11<19


	Validation Loss: 0.7333058842590877

	Validation acc: 0.792805851748809

	Validation prec: 0.25743747179140586

	Validation rec: 0.23737857001244228

	Validation f1: 0.23008423013417212



  2%|▏         | 1/42 [00:00<00:35,  1.16it/s][A

loss: tensor(0.9480, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:32,  1.25it/s][A

loss: tensor(0.7067, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.27it/s][A

loss: tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:29,  1.27it/s][A

loss: tensor(0.7588, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.21it/s][A

loss: tensor(0.7771, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.24it/s][A

loss: tensor(0.7536, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.27it/s][A

loss: tensor(0.7980, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:26,  1.28it/s][A

loss: tensor(0.8651, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:27,  1.22it/s][A

loss: tensor(0.6902, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.23it/s][A

loss: tensor(0.6572, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.25it/s][A

loss: tensor(0.6094, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.26it/s][A

loss: tensor(0.8258, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:24,  1.20it/s][A

loss: tensor(0.6412, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.23it/s][A

loss: tensor(0.6788, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.25it/s][A

loss: tensor(0.6765, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.27it/s][A

loss: tensor(0.7452, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.23it/s][A

loss: tensor(0.6601, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.26it/s][A

loss: tensor(0.7614, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.27it/s][A

loss: tensor(0.7905, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.28it/s][A

loss: tensor(0.7130, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:17,  1.23it/s][A

loss: tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.24it/s][A

loss: tensor(0.5773, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.25it/s][A

loss: tensor(0.6367, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.26it/s][A

loss: tensor(0.5562, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.22it/s][A

loss: tensor(0.7810, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.25it/s][A

loss: tensor(0.5743, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.27it/s][A

loss: tensor(0.6402, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:10,  1.29it/s][A

loss: tensor(0.6085, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.24it/s][A

loss: tensor(0.7198, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.26it/s][A

loss: tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.26it/s][A

loss: tensor(0.7081, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.22it/s][A

loss: tensor(0.6567, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.25it/s][A

loss: tensor(0.7728, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.26it/s][A

loss: tensor(0.6716, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.28it/s][A

loss: tensor(0.7211, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.25it/s][A

loss: tensor(0.7856, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.26it/s][A

loss: tensor(0.8899, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.25it/s][A

loss: tensor(0.7836, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.27it/s][A

loss: tensor(0.7242, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.24it/s][A

loss: tensor(0.7548, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.26it/s][A

loss: tensor(0.7063, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.27it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.52it/s][A

loss: tensor(0.5135, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.7072842291423252

	Training acc: 0.8053239712929225

	Training prec: 0.32285086529970713

	Training rec: 0.24438624573607481

	Training f1: 0.24498761844849143

	Current Learning rate:  0.0009142857142857143



 10%|▉         | 2/21 [00:00<00:02,  8.46it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.36it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.40it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.44it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.44it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.46it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.44it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.36it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.38it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.42it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.50it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.54it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.54it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.55it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.58it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.58it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.57it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.56it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.58it/s][A
Epoch:   9%|▊         | 3/35 [01:47<19


	Validation Loss: 0.7158117450418926

	Validation acc: 0.7979219377401628

	Validation prec: 0.2665155571280954

	Validation rec: 0.24958392077949168

	Validation f1: 0.24281108249033712



  2%|▏         | 1/42 [00:00<00:32,  1.27it/s][A

loss: tensor(0.7521, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:34,  1.16it/s][A

loss: tensor(0.7639, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:32,  1.20it/s][A

loss: tensor(0.6901, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.25it/s][A

loss: tensor(0.7991, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:29,  1.26it/s][A

loss: tensor(0.8367, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.22it/s][A

loss: tensor(0.5739, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.25it/s][A

loss: tensor(0.6824, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:26,  1.27it/s][A

loss: tensor(0.6696, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:25,  1.28it/s][A

loss: tensor(0.7176, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.24it/s][A

loss: tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.27it/s][A

loss: tensor(0.6729, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.29it/s][A

loss: tensor(0.7502, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:22,  1.30it/s][A

loss: tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.25it/s][A

loss: tensor(0.7165, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:21,  1.27it/s][A

loss: tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.29it/s][A

loss: tensor(0.5938, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.30it/s][A

loss: tensor(0.7334, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.24it/s][A

loss: tensor(0.8151, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.25it/s][A

loss: tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.27it/s][A

loss: tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.25it/s][A

loss: tensor(0.6779, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.27it/s][A

loss: tensor(0.5180, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:14,  1.30it/s][A

loss: tensor(0.7799, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:18<00:13,  1.29it/s][A

loss: tensor(0.5486, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.25it/s][A

loss: tensor(0.8862, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.26it/s][A

loss: tensor(0.7430, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.28it/s][A

loss: tensor(0.6951, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:10,  1.29it/s][A

loss: tensor(0.7585, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:22<00:10,  1.24it/s][A

loss: tensor(0.6627, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.26it/s][A

loss: tensor(0.7438, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.27it/s][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:07,  1.27it/s][A

loss: tensor(0.7349, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.23it/s][A

loss: tensor(0.6364, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:26<00:06,  1.26it/s][A

loss: tensor(0.7724, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.27it/s][A

loss: tensor(0.8357, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.28it/s][A

loss: tensor(0.6143, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:04,  1.24it/s][A

loss: tensor(0.6114, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.26it/s][A

loss: tensor(0.7416, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:30<00:02,  1.27it/s][A

loss: tensor(0.6319, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.29it/s][A

loss: tensor(0.7835, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.23it/s][A

loss: tensor(0.6969, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:32<00:00,  1.28it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.90it/s][A

loss: tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6971563597520193

	Training acc: 0.8068172760271489

	Training prec: 0.36276932616634117

	Training rec: 0.25467475471459433

	Training f1: 0.25842199726464044

	Current Learning rate:  0.0008857142857142857



 10%|▉         | 2/21 [00:00<00:02,  8.23it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.36it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.47it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.54it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.56it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.56it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.60it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.64it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.64it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.66it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.64it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.64it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.61it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.53it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  7.80it/s][A
 81%|████████  | 17/21 [00:02<00:00,  7.59it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  7.82it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.08it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.41it/s][A
Epoch:  11%|█▏        | 4/35 [02:22<18


	Validation Loss: 0.7083988870893206

	Validation acc: 0.7959103603618086

	Validation prec: 0.2683713666143195

	Validation rec: 0.2374704992104661

	Validation f1: 0.23150170924262234



  2%|▏         | 1/42 [00:00<00:32,  1.28it/s][A

loss: tensor(0.6742, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:30,  1.29it/s][A

loss: tensor(0.6991, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.23it/s][A

loss: tensor(0.6984, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.26it/s][A

loss: tensor(0.8140, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.26it/s][A

loss: tensor(0.9170, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.27it/s][A

loss: tensor(0.7540, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.22it/s][A

loss: tensor(0.7474, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.22it/s][A

loss: tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.23it/s][A

loss: tensor(0.7234, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.22it/s][A

loss: tensor(0.6952, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:09<00:26,  1.17it/s][A

loss: tensor(0.7503, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.22it/s][A

loss: tensor(0.6901, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.6722, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.26it/s][A

loss: tensor(0.6852, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.23it/s][A

loss: tensor(0.7265, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:21,  1.23it/s][A

loss: tensor(0.7069, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.25it/s][A

loss: tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.22it/s][A

loss: tensor(0.6890, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.7970, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.25it/s][A

loss: tensor(0.5964, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.24it/s][A

loss: tensor(0.6309, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.21it/s][A

loss: tensor(0.6155, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.22it/s][A

loss: tensor(0.6359, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.23it/s][A

loss: tensor(0.6305, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.25it/s][A

loss: tensor(0.6169, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.22it/s][A

loss: tensor(0.6966, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.24it/s][A

loss: tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.24it/s][A

loss: tensor(0.5844, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.25it/s][A

loss: tensor(0.8174, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.22it/s][A

loss: tensor(0.6884, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.22it/s][A

loss: tensor(0.7325, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:07,  1.25it/s][A

loss: tensor(0.6334, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.27it/s][A

loss: tensor(0.7407, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.23it/s][A

loss: tensor(0.7676, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.24it/s][A

loss: tensor(0.7849, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.24it/s][A

loss: tensor(0.7330, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:04,  1.24it/s][A

loss: tensor(0.7643, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.21it/s][A

loss: tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.22it/s][A

loss: tensor(0.6965, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.23it/s][A

loss: tensor(0.7586, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.23it/s][A

loss: tensor(0.6308, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.25it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.28it/s][A

loss: tensor(0.8017, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.7033292338961646

	Training acc: 0.8051409003957081

	Training prec: 0.35516026323754935

	Training rec: 0.2529815183957242

	Training f1: 0.25605720939805776

	Current Learning rate:  0.0008571428571428571



 10%|▉         | 2/21 [00:00<00:02,  8.55it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.41it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.52it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.64it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.64it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.65it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.65it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.67it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.72it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.79it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.34it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.40it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.14it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.06it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.25it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.50it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.65it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.73it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.64it/s][A
Epoch:  14%|█▍        | 5/35 [02:58<17


	Validation Loss: 0.6814313545113518

	Validation acc: 0.8037114592605489

	Validation prec: 0.29697821519885936

	Validation rec: 0.25574671953851247

	Validation f1: 0.25229701845846114



  2%|▏         | 1/42 [00:00<00:31,  1.29it/s][A

loss: tensor(0.5894, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.28it/s][A

loss: tensor(0.6589, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.26it/s][A

loss: tensor(0.6675, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:32,  1.19it/s][A

loss: tensor(0.8398, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.21it/s][A

loss: tensor(0.7092, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.22it/s][A

loss: tensor(0.6695, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.23it/s][A

loss: tensor(0.6689, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:28,  1.18it/s][A

loss: tensor(0.6186, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:27,  1.19it/s][A

loss: tensor(0.6234, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.20it/s][A

loss: tensor(0.7897, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:09<00:26,  1.16it/s][A

loss: tensor(0.7428, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:25,  1.18it/s][A

loss: tensor(0.7153, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:24,  1.20it/s][A

loss: tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:23,  1.21it/s][A

loss: tensor(0.7218, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:23,  1.16it/s][A

loss: tensor(0.6093, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:21,  1.19it/s][A

loss: tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:14<00:20,  1.22it/s][A

loss: tensor(0.8012, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.23it/s][A

loss: tensor(0.5713, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:19,  1.20it/s][A

loss: tensor(0.7129, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:18,  1.21it/s][A

loss: tensor(0.9218, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.23it/s][A

loss: tensor(0.6677, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:18<00:16,  1.24it/s][A

loss: tensor(0.5191, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:19<00:15,  1.20it/s][A

loss: tensor(0.7340, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.22it/s][A

loss: tensor(0.7125, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.23it/s][A

loss: tensor(0.6443, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:12,  1.24it/s][A

loss: tensor(0.7122, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:22<00:12,  1.21it/s][A

loss: tensor(0.5644, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:23<00:11,  1.22it/s][A

loss: tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.23it/s][A

loss: tensor(0.8816, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.24it/s][A

loss: tensor(0.7658, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:09,  1.21it/s][A

loss: tensor(0.7786, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.22it/s][A

loss: tensor(0.7400, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:27<00:07,  1.23it/s][A

loss: tensor(0.7477, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.24it/s][A

loss: tensor(0.8018, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.20it/s][A

loss: tensor(0.6949, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.23it/s][A

loss: tensor(0.6475, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:03,  1.25it/s][A

loss: tensor(0.7267, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:31<00:03,  1.27it/s][A

loss: tensor(0.7209, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:32<00:02,  1.23it/s][A

loss: tensor(0.6998, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.25it/s][A

loss: tensor(0.7105, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.26it/s][A

loss: tensor(0.6444, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.24it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.79it/s][A

loss: tensor(0.9174, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.7033340207168034

	Training acc: 0.8044410153382303

	Training prec: 0.36362035763904954

	Training rec: 0.25145064086862895

	Training f1: 0.25550050800309765

	Current Learning rate:  0.0008285714285714286



 10%|▉         | 2/21 [00:00<00:02,  8.56it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.63it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.67it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.70it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.66it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.57it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.60it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.65it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.68it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.66it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.69it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.69it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.65it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.58it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.57it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.53it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.57it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.59it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  8.57it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.6927050635928199

	Validation acc: 0.7993289271831641

	Validation prec: 0.29238267874450957

	Validation rec: 0.253081896134971

	Validation f1: 0.2499190059674796



  2%|▏         | 1/42 [00:00<00:31,  1.31it/s][A

loss: tensor(0.6714, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:30,  1.31it/s][A

loss: tensor(0.6365, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:29,  1.31it/s][A

loss: tensor(0.7024, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.24it/s][A

loss: tensor(0.6122, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.27it/s][A

loss: tensor(0.6389, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.28it/s][A

loss: tensor(0.6500, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.29it/s][A

loss: tensor(0.8050, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.25it/s][A

loss: tensor(0.7348, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.26it/s][A

loss: tensor(0.7068, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.28it/s][A

loss: tensor(0.6772, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.28it/s][A

loss: tensor(0.7369, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.7745, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.26it/s][A

loss: tensor(0.7028, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:21,  1.27it/s][A

loss: tensor(0.6853, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:20,  1.29it/s][A

loss: tensor(0.7547, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.25it/s][A

loss: tensor(0.7781, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.25it/s][A

loss: tensor(0.7113, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:18,  1.27it/s][A

loss: tensor(0.7611, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:14<00:17,  1.28it/s][A

loss: tensor(0.7944, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.24it/s][A

loss: tensor(0.7345, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.26it/s][A

loss: tensor(0.7556, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.27it/s][A

loss: tensor(0.6730, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:14,  1.28it/s][A

loss: tensor(0.6637, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:18<00:14,  1.24it/s][A

loss: tensor(0.7826, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.26it/s][A

loss: tensor(0.5887, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.27it/s][A

loss: tensor(0.5834, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.28it/s][A

loss: tensor(0.5849, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.24it/s][A

loss: tensor(0.5557, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:22<00:10,  1.26it/s][A

loss: tensor(0.6074, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.27it/s][A

loss: tensor(0.6702, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.29it/s][A

loss: tensor(0.8330, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:07,  1.26it/s][A

loss: tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.27it/s][A

loss: tensor(0.6516, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:26<00:06,  1.28it/s][A

loss: tensor(0.6592, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.29it/s][A

loss: tensor(0.6880, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.24it/s][A

loss: tensor(0.7395, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.26it/s][A

loss: tensor(0.5726, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:29<00:03,  1.28it/s][A

loss: tensor(0.7483, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:30<00:02,  1.22it/s][A

loss: tensor(0.6904, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.25it/s][A

loss: tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.27it/s][A

loss: tensor(0.7280, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:32<00:00,  1.28it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.76it/s][A

loss: tensor(0.7183, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6915095377536047

	Training acc: 0.8070607868529779

	Training prec: 0.3803230958143026

	Training rec: 0.25220213941682396

	Training f1: 0.25621640718713834

	Current Learning rate:  0.0008



 10%|▉         | 2/21 [00:00<00:02,  8.78it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.71it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.58it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.55it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.55it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.67it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.78it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.82it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.87it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.91it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.97it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.99it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  9.01it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  9.05it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  9.06it/s][A
 81%|████████  | 17/21 [00:01<00:00,  9.10it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  9.10it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  9.10it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.97it/s][A
Epoch:  20%|██        | 7/35 [04:09<16


	Validation Loss: 0.7044902741909027

	Validation acc: 0.7987394030870549

	Validation prec: 0.2847983500284713

	Validation rec: 0.242211354872457

	Validation f1: 0.2388071051123985



  2%|▏         | 1/42 [00:00<00:36,  1.13it/s][A

loss: tensor(0.7869, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:32,  1.24it/s][A

loss: tensor(0.6129, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.27it/s][A

loss: tensor(0.6671, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:29,  1.29it/s][A

loss: tensor(0.7269, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.23it/s][A

loss: tensor(0.7012, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.26it/s][A

loss: tensor(0.6576, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.27it/s][A

loss: tensor(0.7426, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:26,  1.28it/s][A

loss: tensor(0.6878, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.23it/s][A

loss: tensor(0.8019, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.25it/s][A

loss: tensor(0.7556, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.28it/s][A

loss: tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.29it/s][A

loss: tensor(0.6865, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:21,  1.28it/s][A

loss: tensor(0.6009, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:20,  1.29it/s][A

loss: tensor(0.5624, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.30it/s][A

loss: tensor(0.7392, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.20it/s][A

loss: tensor(0.6004, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.24it/s][A

loss: tensor(0.5773, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.27it/s][A

loss: tensor(0.5565, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.29it/s][A

loss: tensor(0.7323, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.24it/s][A

loss: tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.25it/s][A

loss: tensor(0.6376, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:14,  1.28it/s][A

loss: tensor(0.8867, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:18<00:14,  1.28it/s][A

loss: tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.23it/s][A

loss: tensor(0.7689, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.24it/s][A

loss: tensor(0.5825, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.25it/s][A

loss: tensor(0.7917, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.27it/s][A

loss: tensor(0.6934, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.24it/s][A

loss: tensor(0.7600, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.27it/s][A

loss: tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.28it/s][A

loss: tensor(0.5936, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:07,  1.25it/s][A

loss: tensor(0.7634, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.26it/s][A

loss: tensor(0.6887, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:26<00:06,  1.26it/s][A

loss: tensor(0.5836, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.28it/s][A

loss: tensor(0.9381, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.25it/s][A

loss: tensor(0.8905, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.27it/s][A

loss: tensor(0.6455, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.28it/s][A

loss: tensor(0.6588, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:30<00:02,  1.30it/s][A

loss: tensor(0.5925, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.25it/s][A

loss: tensor(0.7358, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.27it/s][A

loss: tensor(0.7344, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:32<00:00,  1.28it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.30it/s][A

loss: tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6918536609127408

	Training acc: 0.807591089962734

	Training prec: 0.3789001654025165

	Training rec: 0.25734863184301265

	Training f1: 0.2626784374412495

	Current Learning rate:  0.0007714285714285715



 10%|▉         | 2/21 [00:00<00:02,  8.30it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.19it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.22it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.22it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.24it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.02it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.08it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.10it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.16it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.20it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.22it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.26it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.28it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.22it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.24it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.21it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.18it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.22it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.28it/s][A
Epoch:  23%|██▎       | 8/35 [04:45<16


	Validation Loss: 0.708239075683412

	Validation acc: 0.7969260477387996

	Validation prec: 0.2679167295392636

	Validation rec: 0.2346417781232953

	Validation f1: 0.22799367340605098



  2%|▏         | 1/42 [00:00<00:30,  1.32it/s][A

loss: tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:32,  1.22it/s][A

loss: tensor(0.6269, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.26it/s][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:29,  1.29it/s][A

loss: tensor(0.7282, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:28,  1.29it/s][A

loss: tensor(0.7516, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.24it/s][A

loss: tensor(0.7663, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.27it/s][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:26,  1.29it/s][A

loss: tensor(0.8073, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:25,  1.30it/s][A

loss: tensor(0.6084, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.25it/s][A

loss: tensor(0.6393, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.27it/s][A

loss: tensor(0.6197, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.27it/s][A

loss: tensor(0.7711, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:22,  1.27it/s][A

loss: tensor(0.7739, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.22it/s][A

loss: tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:21,  1.23it/s][A

loss: tensor(0.8201, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.24it/s][A

loss: tensor(0.6077, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.25it/s][A

loss: tensor(0.7731, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.21it/s][A

loss: tensor(0.7348, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.5512, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.24it/s][A

loss: tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:17,  1.21it/s][A

loss: tensor(0.7202, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.22it/s][A

loss: tensor(0.6584, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.24it/s][A

loss: tensor(0.7019, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.25it/s][A

loss: tensor(0.6286, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.21it/s][A

loss: tensor(0.7009, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:13,  1.23it/s][A

loss: tensor(0.5430, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.24it/s][A

loss: tensor(0.7518, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.25it/s][A

loss: tensor(0.7035, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.21it/s][A

loss: tensor(0.7683, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.23it/s][A

loss: tensor(0.7773, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.24it/s][A

loss: tensor(0.7106, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.25it/s][A

loss: tensor(0.7352, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.21it/s][A

loss: tensor(0.7548, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.23it/s][A

loss: tensor(0.7912, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.24it/s][A

loss: tensor(0.5994, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.25it/s][A

loss: tensor(0.8427, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:04,  1.21it/s][A

loss: tensor(0.8182, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.24it/s][A

loss: tensor(0.5163, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.25it/s][A

loss: tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.27it/s][A

loss: tensor(0.7436, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.24it/s][A

loss: tensor(0.6724, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.26it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.57it/s][A

loss: tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6943239796729315

	Training acc: 0.8052686652830298

	Training prec: 0.3850151606081421

	Training rec: 0.2523302674241463

	Training f1: 0.25812278813106326

	Current Learning rate:  0.0007428571428571429



 10%|▉         | 2/21 [00:00<00:02,  8.54it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.53it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.58it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.59it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.58it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.53it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.51it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.52it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.50it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.52it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.53it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.54it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.51it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.50it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.50it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.49it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.47it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.49it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.60it/s][A
Epoch:  26%|██▌       | 9/35 [05:21<15


	Validation Loss: 0.7064813077449799

	Validation acc: 0.7980953770302516

	Validation prec: 0.3219345138124016

	Validation rec: 0.25112438367697004

	Validation f1: 0.2501519019107469



  2%|▏         | 1/42 [00:00<00:30,  1.33it/s][A

loss: tensor(0.8026, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:30,  1.33it/s][A

loss: tensor(0.6293, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.25it/s][A

loss: tensor(0.6778, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:29,  1.28it/s][A

loss: tensor(0.7835, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:28,  1.29it/s][A

loss: tensor(0.8255, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:27,  1.31it/s][A

loss: tensor(0.6660, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.26it/s][A

loss: tensor(0.6605, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:26,  1.28it/s][A

loss: tensor(0.7729, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:06<00:25,  1.29it/s][A

loss: tensor(0.7779, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:24,  1.30it/s][A

loss: tensor(0.8104, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.25it/s][A

loss: tensor(0.6895, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.28it/s][A

loss: tensor(0.6219, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:22,  1.29it/s][A

loss: tensor(0.7710, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:10<00:21,  1.30it/s][A

loss: tensor(0.6977, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:21,  1.26it/s][A

loss: tensor(0.5627, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.28it/s][A

loss: tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.29it/s][A

loss: tensor(0.5761, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.26it/s][A

loss: tensor(0.5085, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:14<00:17,  1.28it/s][A

loss: tensor(0.5935, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.29it/s][A

loss: tensor(0.8142, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.30it/s][A

loss: tensor(0.7449, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.26it/s][A

loss: tensor(0.6293, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:17<00:14,  1.28it/s][A

loss: tensor(0.7268, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:18<00:13,  1.29it/s][A

loss: tensor(0.6041, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.30it/s][A

loss: tensor(0.4678, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.26it/s][A

loss: tensor(0.6749, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.28it/s][A

loss: tensor(0.8553, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:21<00:10,  1.30it/s][A

loss: tensor(0.7444, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:22<00:09,  1.31it/s][A

loss: tensor(0.7072, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.26it/s][A

loss: tensor(0.7230, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.28it/s][A

loss: tensor(0.6920, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:24<00:07,  1.30it/s][A

loss: tensor(0.6328, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:25<00:06,  1.31it/s][A

loss: tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:26<00:06,  1.26it/s][A

loss: tensor(0.6577, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.28it/s][A

loss: tensor(0.8838, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.30it/s][A

loss: tensor(0.4999, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:28<00:03,  1.31it/s][A

loss: tensor(0.6025, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:29<00:03,  1.26it/s][A

loss: tensor(0.7743, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:30<00:02,  1.28it/s][A

loss: tensor(0.7235, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.30it/s][A

loss: tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:31<00:00,  1.31it/s][A

loss: tensor(0.7159, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:32<00:00,  1.30it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.03it/s][A

loss: tensor(0.8007, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6902868854148048

	Training acc: 0.8067973814274325

	Training prec: 0.3800131205853778

	Training rec: 0.2568152139527115

	Training f1: 0.26129730192115924

	Current Learning rate:  0.0007142857142857143



 10%|▉         | 2/21 [00:00<00:02,  8.20it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.35it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.43it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.48it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.50it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.53it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.52it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.52it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.51it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.52it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.52it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.54it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.54it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.53it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.54it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.52it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.47it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.51it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.58it/s][A
Epoch:  29%|██▊       | 10/35 [05:55<1


	Validation Loss: 0.688035881235486

	Validation acc: 0.800139661024498

	Validation prec: 0.32429681628582585

	Validation rec: 0.26598250152542313

	Validation f1: 0.2651748429246626



  2%|▏         | 1/42 [00:00<00:30,  1.33it/s][A

loss: tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:30,  1.33it/s][A

loss: tensor(0.6533, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:29,  1.33it/s][A

loss: tensor(0.6290, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.26it/s][A

loss: tensor(0.7473, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:28,  1.29it/s][A

loss: tensor(0.7143, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:27,  1.30it/s][A

loss: tensor(0.7961, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:26,  1.31it/s][A

loss: tensor(0.5829, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.26it/s][A

loss: tensor(0.5694, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:06<00:25,  1.28it/s][A

loss: tensor(0.7252, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:24,  1.29it/s][A

loss: tensor(0.7265, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.26it/s][A

loss: tensor(0.5039, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.28it/s][A

loss: tensor(0.7523, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:22,  1.29it/s][A

loss: tensor(0.9136, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:10<00:21,  1.30it/s][A

loss: tensor(0.7141, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:21,  1.26it/s][A

loss: tensor(0.6732, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.28it/s][A

loss: tensor(0.7315, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.30it/s][A

loss: tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:13<00:18,  1.31it/s][A

loss: tensor(0.6302, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:14<00:18,  1.26it/s][A

loss: tensor(0.7866, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.28it/s][A

loss: tensor(0.6687, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.30it/s][A

loss: tensor(0.5583, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.31it/s][A

loss: tensor(0.8370, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:17<00:15,  1.25it/s][A

loss: tensor(0.5931, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:18<00:14,  1.28it/s][A

loss: tensor(0.7898, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.29it/s][A

loss: tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.30it/s][A

loss: tensor(0.8154, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.26it/s][A

loss: tensor(0.7856, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:21<00:11,  1.27it/s][A

loss: tensor(0.6814, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:22<00:10,  1.28it/s][A

loss: tensor(0.5698, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.30it/s][A

loss: tensor(0.7827, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.26it/s][A

loss: tensor(0.7792, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:24<00:07,  1.28it/s][A

loss: tensor(0.7765, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:25<00:06,  1.29it/s][A

loss: tensor(0.5983, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:26<00:06,  1.30it/s][A

loss: tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.26it/s][A

loss: tensor(0.6021, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.28it/s][A

loss: tensor(0.7428, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:28<00:03,  1.29it/s][A

loss: tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:29<00:03,  1.30it/s][A

loss: tensor(0.6144, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:30<00:02,  1.25it/s][A

loss: tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.27it/s][A

loss: tensor(0.7246, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:31<00:00,  1.29it/s][A

loss: tensor(0.5757, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:32<00:00,  1.30it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.66it/s][A

loss: tensor(0.7420, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6899226960681734

	Training acc: 0.8068624625673179

	Training prec: 0.38889494460768503

	Training rec: 0.2513960831771761

	Training f1: 0.25720970343781996

	Current Learning rate:  0.0006857142857142857



 10%|▉         | 2/21 [00:00<00:02,  8.56it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.53it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.54it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.55it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.57it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.56it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.52it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.50it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.52it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.53it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.55it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.54it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.54it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.45it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.49it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.48it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.53it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.56it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  8.58it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.6886679331461588

	Validation acc: 0.7986917750098567

	Validation prec: 0.33172449263391407

	Validation rec: 0.24882718238970386

	Validation f1: 0.24917007978189745



  2%|▏         | 1/42 [00:00<00:31,  1.32it/s][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:30,  1.31it/s][A

loss: tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:29,  1.31it/s][A

loss: tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.25it/s][A

loss: tensor(0.6988, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.27it/s][A

loss: tensor(0.5974, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:27,  1.29it/s][A

loss: tensor(0.7060, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:26,  1.31it/s][A

loss: tensor(0.5816, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.26it/s][A

loss: tensor(0.7662, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:25,  1.28it/s][A

loss: tensor(0.7349, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:24,  1.29it/s][A

loss: tensor(0.8514, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:23,  1.30it/s][A

loss: tensor(0.5805, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.5784, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.24it/s][A

loss: tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.23it/s][A

loss: tensor(0.6077, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:22,  1.22it/s][A

loss: tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:22,  1.17it/s][A

loss: tensor(0.5789, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:21,  1.17it/s][A

loss: tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:20,  1.17it/s][A

loss: tensor(0.7638, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:19,  1.20it/s][A

loss: tensor(0.8192, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:18,  1.17it/s][A

loss: tensor(0.6138, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.19it/s][A

loss: tensor(0.8393, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.20it/s][A

loss: tensor(0.7529, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.22it/s][A

loss: tensor(0.6563, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:15,  1.19it/s][A

loss: tensor(0.9385, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.22it/s][A

loss: tensor(0.8076, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:12,  1.26it/s][A

loss: tensor(0.7083, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.27it/s][A

loss: tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.24it/s][A

loss: tensor(0.7552, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.26it/s][A

loss: tensor(0.5932, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.27it/s][A

loss: tensor(0.7703, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.29it/s][A

loss: tensor(0.6086, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.24it/s][A

loss: tensor(0.5790, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.26it/s][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.28it/s][A

loss: tensor(0.9073, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.29it/s][A

loss: tensor(0.7063, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.25it/s][A

loss: tensor(0.7312, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.27it/s][A

loss: tensor(0.6749, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.28it/s][A

loss: tensor(0.7541, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.25it/s][A

loss: tensor(0.5435, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.27it/s][A

loss: tensor(0.9195, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.28it/s][A

loss: tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.27it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.13it/s][A

loss: tensor(0.4565, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6880576816343126

	Training acc: 0.8068942534015049

	Training prec: 0.37546162419448725

	Training rec: 0.2543096367871518

	Training f1: 0.2581424191476217

	Current Learning rate:  0.0006571428571428571



 10%|▉         | 2/21 [00:00<00:02,  8.30it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.34it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.38it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.37it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.40it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.40it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.37it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.36it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.36it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.39it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.33it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.39it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.41it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.41it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.45it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.49it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.49it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.51it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.51it/s][A
Epoch:  34%|███▍      | 12/35 [07:06<1


	Validation Loss: 0.7180732971145993

	Validation acc: 0.7947146953276961

	Validation prec: 0.27958185893618714

	Validation rec: 0.24481306412291906

	Validation f1: 0.2395202333896527



  2%|▏         | 1/42 [00:00<00:34,  1.18it/s][A

loss: tensor(0.6424, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.27it/s][A

loss: tensor(0.5573, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.30it/s][A

loss: tensor(0.5296, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:29,  1.31it/s][A

loss: tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.26it/s][A

loss: tensor(0.5263, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.28it/s][A

loss: tensor(0.6023, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:26,  1.30it/s][A

loss: tensor(0.6853, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:26,  1.31it/s][A

loss: tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.26it/s][A

loss: tensor(0.8062, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.28it/s][A

loss: tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.29it/s][A

loss: tensor(0.6939, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.30it/s][A

loss: tensor(0.7254, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:22,  1.26it/s][A

loss: tensor(0.8406, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:10<00:21,  1.28it/s][A

loss: tensor(0.6577, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:20,  1.30it/s][A

loss: tensor(0.8618, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:19,  1.31it/s][A

loss: tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.26it/s][A

loss: tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:18,  1.28it/s][A

loss: tensor(0.7750, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:14<00:17,  1.29it/s][A

loss: tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:16,  1.30it/s][A

loss: tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.26it/s][A

loss: tensor(0.5234, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.28it/s][A

loss: tensor(0.7178, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:17<00:14,  1.29it/s][A

loss: tensor(0.7008, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:18<00:13,  1.30it/s][A

loss: tensor(0.7916, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.26it/s][A

loss: tensor(0.8622, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.28it/s][A

loss: tensor(0.8483, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.29it/s][A

loss: tensor(0.7258, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:21<00:10,  1.30it/s][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:22<00:10,  1.26it/s][A

loss: tensor(0.7041, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.28it/s][A

loss: tensor(0.7168, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.29it/s][A

loss: tensor(0.7024, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:24<00:07,  1.27it/s][A

loss: tensor(0.7855, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:25<00:07,  1.28it/s][A

loss: tensor(0.5907, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:26<00:06,  1.29it/s][A

loss: tensor(0.5941, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.30it/s][A

loss: tensor(0.5441, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.26it/s][A

loss: tensor(0.6300, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:28<00:03,  1.28it/s][A

loss: tensor(0.5977, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:29<00:03,  1.30it/s][A

loss: tensor(0.8096, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:30<00:02,  1.31it/s][A

loss: tensor(0.7276, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.26it/s][A

loss: tensor(0.5490, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:31<00:00,  1.29it/s][A

loss: tensor(0.6588, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:32<00:00,  1.30it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.64it/s][A

loss: tensor(0.7426, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6791492530277797

	Training acc: 0.8067843037078966

	Training prec: 0.4108347356376429

	Training rec: 0.256485985425642

	Training f1: 0.2637240874056755

	Current Learning rate:  0.0006285714285714285



 10%|▉         | 2/21 [00:00<00:02,  8.57it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.55it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.53it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.56it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.56it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.51it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.52it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.52it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.53it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.55it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.58it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.58it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.59it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.55it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.52it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.52it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.51it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.55it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.63it/s][A
Epoch:  37%|███▋      | 13/35 [07:41<1


	Validation Loss: 0.6862607442197346

	Validation acc: 0.8012987571376593

	Validation prec: 0.31324769010225884

	Validation rec: 0.26776898624767936

	Validation f1: 0.26538659305851975



  2%|▏         | 1/42 [00:00<00:30,  1.33it/s][A

loss: tensor(0.6957, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:32,  1.23it/s][A

loss: tensor(0.6574, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.27it/s][A

loss: tensor(0.6937, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:29,  1.29it/s][A

loss: tensor(0.5477, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:28,  1.30it/s][A

loss: tensor(0.6861, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.25it/s][A

loss: tensor(0.6920, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.27it/s][A

loss: tensor(0.6800, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:26,  1.29it/s][A

loss: tensor(0.6704, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:25,  1.30it/s][A

loss: tensor(0.8416, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.26it/s][A

loss: tensor(0.7956, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.28it/s][A

loss: tensor(0.7592, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:23,  1.29it/s][A

loss: tensor(0.8463, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:22,  1.30it/s][A

loss: tensor(0.8149, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:10<00:22,  1.26it/s][A

loss: tensor(0.7735, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:21,  1.28it/s][A

loss: tensor(0.6673, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.29it/s][A

loss: tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.30it/s][A

loss: tensor(0.6760, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.26it/s][A

loss: tensor(0.7010, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:14<00:17,  1.28it/s][A

loss: tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.29it/s][A

loss: tensor(0.5612, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.26it/s][A

loss: tensor(0.5019, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.28it/s][A

loss: tensor(0.7038, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:17<00:14,  1.29it/s][A

loss: tensor(0.7196, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:18<00:13,  1.30it/s][A

loss: tensor(0.8546, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.26it/s][A

loss: tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.28it/s][A

loss: tensor(0.6578, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.30it/s][A

loss: tensor(0.5772, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:21<00:10,  1.31it/s][A

loss: tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:22<00:10,  1.26it/s][A

loss: tensor(0.7316, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.28it/s][A

loss: tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.30it/s][A

loss: tensor(0.5506, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:24<00:07,  1.29it/s][A

loss: tensor(0.7266, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:25<00:07,  1.22it/s][A

loss: tensor(0.7961, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:26<00:06,  1.24it/s][A

loss: tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.26it/s][A

loss: tensor(0.6828, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.28it/s][A

loss: tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:04,  1.22it/s][A

loss: tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:29<00:03,  1.23it/s][A

loss: tensor(0.6035, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:30<00:02,  1.26it/s][A

loss: tensor(0.7302, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.28it/s][A

loss: tensor(0.7186, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.23it/s][A

loss: tensor(0.6594, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:32<00:00,  1.29it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.50it/s][A

loss: tensor(0.8068, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6872847037655967

	Training acc: 0.8055587718459006

	Training prec: 0.40064854655178084

	Training rec: 0.2522685078961857

	Training f1: 0.2575016345718651

	Current Learning rate:  0.0006



 10%|▉         | 2/21 [00:00<00:02,  7.73it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  7.95it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.36it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.57it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.71it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.81it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.87it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.93it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.97it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.99it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.93it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.95it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.99it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  9.01it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  9.03it/s][A
 81%|████████  | 17/21 [00:01<00:00,  9.02it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  9.04it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  9.06it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.85it/s][A
Epoch:  40%|████      | 14/35 [08:16<1


	Validation Loss: 0.678309977054596

	Validation acc: 0.802154099668176

	Validation prec: 0.3502366163755425

	Validation rec: 0.26879408340585137

	Validation f1: 0.27270314420941205



  2%|▏         | 1/42 [00:00<00:33,  1.24it/s][A

loss: tensor(0.7235, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:32,  1.25it/s][A

loss: tensor(0.7135, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:32,  1.19it/s][A

loss: tensor(0.6060, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:31,  1.20it/s][A

loss: tensor(0.6658, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.21it/s][A

loss: tensor(0.6600, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.22it/s][A

loss: tensor(0.8505, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:29,  1.18it/s][A

loss: tensor(0.5090, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.22it/s][A

loss: tensor(0.7886, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.24it/s][A

loss: tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.24it/s][A

loss: tensor(0.4953, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:09<00:25,  1.21it/s][A

loss: tensor(0.7850, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.5459, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:23,  1.21it/s][A

loss: tensor(0.7063, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.20it/s][A

loss: tensor(0.6476, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:21,  1.24it/s][A

loss: tensor(0.7394, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.24it/s][A

loss: tensor(0.7516, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.22it/s][A

loss: tensor(0.8196, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.5810, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.24it/s][A

loss: tensor(0.7137, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:16,  1.26it/s][A

loss: tensor(0.6860, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.23it/s][A

loss: tensor(0.8138, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.23it/s][A

loss: tensor(0.6941, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.24it/s][A

loss: tensor(0.7019, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.18it/s][A

loss: tensor(0.5258, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.16it/s][A

loss: tensor(0.5687, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:22<00:12,  1.18it/s][A

loss: tensor(0.6625, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:23<00:11,  1.18it/s][A

loss: tensor(0.6039, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.19it/s][A

loss: tensor(0.8701, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:10,  1.17it/s][A

loss: tensor(0.7416, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:09,  1.19it/s][A

loss: tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.20it/s][A

loss: tensor(0.7693, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:27<00:07,  1.21it/s][A

loss: tensor(0.7500, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:28<00:06,  1.18it/s][A

loss: tensor(0.7179, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.19it/s][A

loss: tensor(0.6189, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.21it/s][A

loss: tensor(0.5459, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.21it/s][A

loss: tensor(0.8583, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:31<00:03,  1.17it/s][A

loss: tensor(0.7298, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:32<00:02,  1.19it/s][A

loss: tensor(0.6884, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:33<00:01,  1.21it/s][A

loss: tensor(0.6861, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.18it/s][A

loss: tensor(0.7682, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:34<00:00,  1.22it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.73it/s][A

loss: tensor(0.5178, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6853071827264059

	Training acc: 0.8066558954379481

	Training prec: 0.38148733066770124

	Training rec: 0.2533247625750314

	Training f1: 0.25954410363932706

	Current Learning rate:  0.0005714285714285714



 10%|▉         | 2/21 [00:00<00:02,  8.93it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.46it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.41it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.70it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.85it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.92it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  9.00it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  9.02it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  9.04it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  9.01it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.60it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.63it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.65it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.60it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.59it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.59it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.21it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.18it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.68it/s][A
Epoch:  43%|████▎     | 15/35 [08:52<1


	Validation Loss: 0.6963157994406564

	Validation acc: 0.8016910106844173

	Validation prec: 0.29456416875010527

	Validation rec: 0.2543603063816634

	Validation f1: 0.25141365989607084



  2%|▏         | 1/42 [00:00<00:33,  1.24it/s][A

loss: tensor(0.7459, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:32,  1.22it/s][A

loss: tensor(0.6403, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.24it/s][A

loss: tensor(0.5819, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:31,  1.20it/s][A

loss: tensor(0.7016, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:29,  1.24it/s][A

loss: tensor(0.6276, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.25it/s][A

loss: tensor(0.7607, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.27it/s][A

loss: tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.23it/s][A

loss: tensor(0.7779, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.25it/s][A

loss: tensor(0.5903, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.26it/s][A

loss: tensor(0.7076, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.23it/s][A

loss: tensor(0.7104, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.5814, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.26it/s][A

loss: tensor(0.6607, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.22it/s][A

loss: tensor(0.6448, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.24it/s][A

loss: tensor(0.8474, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.24it/s][A

loss: tensor(0.5779, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.25it/s][A

loss: tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.22it/s][A

loss: tensor(0.6376, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.23it/s][A

loss: tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.25it/s][A

loss: tensor(0.6106, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.25it/s][A

loss: tensor(0.6681, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.22it/s][A

loss: tensor(0.8177, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.20it/s][A

loss: tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.23it/s][A

loss: tensor(0.7165, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.25it/s][A

loss: tensor(0.7341, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.21it/s][A

loss: tensor(0.7150, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.22it/s][A

loss: tensor(0.6691, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.23it/s][A

loss: tensor(0.8289, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.24it/s][A

loss: tensor(0.7198, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:09,  1.21it/s][A

loss: tensor(0.6853, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.23it/s][A

loss: tensor(0.7982, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.24it/s][A

loss: tensor(0.5700, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.24it/s][A

loss: tensor(0.7866, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.20it/s][A

loss: tensor(0.8378, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.21it/s][A

loss: tensor(0.7019, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.22it/s][A

loss: tensor(0.7834, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.24it/s][A

loss: tensor(0.6131, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.20it/s][A

loss: tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.22it/s][A

loss: tensor(0.6603, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.23it/s][A

loss: tensor(0.5380, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.25it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.53it/s][A

loss: tensor(0.4605, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6815347153516043

	Training acc: 0.8088248587597962

	Training prec: 0.4089300280255331

	Training rec: 0.2582082214050771

	Training f1: 0.26563594914229877

	Current Learning rate:  0.0005428571428571428



 10%|▉         | 2/21 [00:00<00:02,  8.32it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.28it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.30it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.81it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.02it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.09it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.19it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.26it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.30it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  7.78it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  7.97it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.07it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.13it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.16it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.20it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.26it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.00it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  7.11it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  7.45it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.7068780930269332

	Validation acc: 0.7957501256144055

	Validation prec: 0.3069003822197651

	Validation rec: 0.2404840336691005

	Validation f1: 0.23798675479171003



  2%|▏         | 1/42 [00:00<00:32,  1.26it/s][A

loss: tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.27it/s][A

loss: tensor(0.8779, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.26it/s][A

loss: tensor(0.7133, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:31,  1.21it/s][A

loss: tensor(0.9111, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.22it/s][A

loss: tensor(0.6728, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.23it/s][A

loss: tensor(0.7104, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.24it/s][A

loss: tensor(0.7115, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:28,  1.21it/s][A

loss: tensor(0.7236, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:27,  1.22it/s][A

loss: tensor(0.7382, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.24it/s][A

loss: tensor(0.8215, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.24it/s][A

loss: tensor(0.6266, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.21it/s][A

loss: tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.23it/s][A

loss: tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.24it/s][A

loss: tensor(0.6648, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.25it/s][A

loss: tensor(0.6572, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:21,  1.21it/s][A

loss: tensor(0.7080, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.22it/s][A

loss: tensor(0.6100, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.23it/s][A

loss: tensor(0.6865, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.5346, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:18,  1.20it/s][A

loss: tensor(0.5571, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.22it/s][A

loss: tensor(0.6796, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.23it/s][A

loss: tensor(0.6555, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.24it/s][A

loss: tensor(0.6577, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.20it/s][A

loss: tensor(0.6413, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.22it/s][A

loss: tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.23it/s][A

loss: tensor(0.6239, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.24it/s][A

loss: tensor(0.6541, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.20it/s][A

loss: tensor(0.5590, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.22it/s][A

loss: tensor(0.8113, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.22it/s][A

loss: tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.23it/s][A

loss: tensor(0.5951, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.20it/s][A

loss: tensor(0.7158, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.21it/s][A

loss: tensor(0.6210, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.24it/s][A

loss: tensor(0.8896, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.24it/s][A

loss: tensor(0.5339, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.21it/s][A

loss: tensor(0.6710, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.22it/s][A

loss: tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:31<00:03,  1.23it/s][A

loss: tensor(0.6805, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.19it/s][A

loss: tensor(0.6913, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.21it/s][A

loss: tensor(0.6179, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.22it/s][A

loss: tensor(0.7850, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.24it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.65it/s][A

loss: tensor(0.8602, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6818139758848009

	Training acc: 0.805789154079861

	Training prec: 0.42254099954915164

	Training rec: 0.25565132992900275

	Training f1: 0.2631937792727769

	Current Learning rate:  0.0005142857142857142



 10%|▉         | 2/21 [00:00<00:02,  8.62it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.67it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.66it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.70it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.11it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.28it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.38it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.48it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.54it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.58it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.08it/s][A
 62%|██████▏   | 13/21 [00:01<00:01,  7.95it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.02it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.03it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.08it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.12it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.16it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.06it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  7.81it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.6793123767489478

	Validation acc: 0.8003939207825855

	Validation prec: 0.34615413786308236

	Validation rec: 0.25758112529754124

	Validation f1: 0.25989612207010593



  2%|▏         | 1/42 [00:00<00:36,  1.12it/s][A

loss: tensor(0.6785, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:33,  1.19it/s][A

loss: tensor(0.5671, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.22it/s][A

loss: tensor(0.6575, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.25it/s][A

loss: tensor(0.7855, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.20it/s][A

loss: tensor(0.6328, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.22it/s][A

loss: tensor(0.6469, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:29,  1.20it/s][A

loss: tensor(0.6240, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.23it/s][A

loss: tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:27,  1.20it/s][A

loss: tensor(0.6180, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.23it/s][A

loss: tensor(0.7770, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:09<00:25,  1.24it/s][A

loss: tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.9094, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.21it/s][A

loss: tensor(0.6065, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.22it/s][A

loss: tensor(0.6719, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.23it/s][A

loss: tensor(0.7258, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:20,  1.24it/s][A

loss: tensor(0.5868, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.21it/s][A

loss: tensor(0.7051, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.22it/s][A

loss: tensor(0.7360, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.24it/s][A

loss: tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.24it/s][A

loss: tensor(0.6969, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.21it/s][A

loss: tensor(0.6574, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:18<00:16,  1.18it/s][A

loss: tensor(0.7326, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.20it/s][A

loss: tensor(0.5275, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.22it/s][A

loss: tensor(0.5935, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.19it/s][A

loss: tensor(0.6015, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.21it/s][A

loss: tensor(0.7308, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:22<00:12,  1.23it/s][A

loss: tensor(0.6536, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.23it/s][A

loss: tensor(0.7316, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.20it/s][A

loss: tensor(0.7830, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.22it/s][A

loss: tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.23it/s][A

loss: tensor(0.8314, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.21it/s][A

loss: tensor(0.7458, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:27<00:07,  1.22it/s][A

loss: tensor(0.6688, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.25it/s][A

loss: tensor(0.7252, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.24it/s][A

loss: tensor(0.6521, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.21it/s][A

loss: tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.23it/s][A

loss: tensor(0.7143, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:31<00:03,  1.24it/s][A

loss: tensor(0.7160, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.24it/s][A

loss: tensor(0.6431, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.21it/s][A

loss: tensor(0.8470, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.22it/s][A

loss: tensor(0.7270, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.24it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.57it/s][A

loss: tensor(0.6456, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6829644001665569

	Training acc: 0.8085081117135017

	Training prec: 0.41481605745086886

	Training rec: 0.25794868416623673

	Training f1: 0.2665035339070929

	Current Learning rate:  0.0004857142857142857



 10%|▉         | 2/21 [00:00<00:02,  8.38it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.36it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.39it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.67it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.00it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.28it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.08it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.19it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.41it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.58it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.75it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.85it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.83it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.40it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.60it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.78it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.83it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.91it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.65it/s][A
Epoch:  51%|█████▏    | 18/35 [10:41<1


	Validation Loss: 0.6977987885475159

	Validation acc: 0.7963016474404567

	Validation prec: 0.30461413096605366

	Validation rec: 0.25122968151668224

	Validation f1: 0.24879993331629632



  2%|▏         | 1/42 [00:00<00:32,  1.26it/s][A

loss: tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:33,  1.18it/s][A

loss: tensor(0.7010, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.22it/s][A

loss: tensor(0.6323, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.24it/s][A

loss: tensor(0.7450, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:29,  1.25it/s][A

loss: tensor(0.7099, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.21it/s][A

loss: tensor(0.6513, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.23it/s][A

loss: tensor(0.5947, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.24it/s][A

loss: tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.25it/s][A

loss: tensor(0.8433, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.22it/s][A

loss: tensor(0.7476, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.24it/s][A

loss: tensor(0.8901, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.8933, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.6162, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:23,  1.22it/s][A

loss: tensor(0.5831, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.24it/s][A

loss: tensor(0.5958, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.25it/s][A

loss: tensor(0.6114, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.27it/s][A

loss: tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.23it/s][A

loss: tensor(0.8397, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.25it/s][A

loss: tensor(0.6600, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.26it/s][A

loss: tensor(0.7152, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:17,  1.23it/s][A

loss: tensor(0.7895, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.24it/s][A

loss: tensor(0.5004, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.25it/s][A

loss: tensor(0.7498, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.26it/s][A

loss: tensor(0.6378, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.22it/s][A

loss: tensor(0.5737, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.25it/s][A

loss: tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.26it/s][A

loss: tensor(0.6214, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.27it/s][A

loss: tensor(0.6715, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.22it/s][A

loss: tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.24it/s][A

loss: tensor(0.6903, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.25it/s][A

loss: tensor(0.6270, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:07,  1.25it/s][A

loss: tensor(0.5584, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.22it/s][A

loss: tensor(0.6510, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.23it/s][A

loss: tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.25it/s][A

loss: tensor(0.6882, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.25it/s][A

loss: tensor(0.6996, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:04,  1.23it/s][A

loss: tensor(0.7828, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.24it/s][A

loss: tensor(0.6566, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.24it/s][A

loss: tensor(0.7195, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.25it/s][A

loss: tensor(0.8117, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.21it/s][A

loss: tensor(0.6906, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.25it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.43it/s][A

loss: tensor(0.6246, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6821807793208531

	Training acc: 0.807074124555212

	Training prec: 0.4111173154696828

	Training rec: 0.25476479141993513

	Training f1: 0.2618784568571207

	Current Learning rate:  0.00045714285714285713



 10%|▉         | 2/21 [00:00<00:02,  8.39it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.23it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.30it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.23it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  7.91it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.06it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.15it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.21it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.26it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.27it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.26it/s][A
 62%|██████▏   | 13/21 [00:01<00:01,  7.88it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  7.90it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.02it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.16it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.27it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.34it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.25it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.24it/s][A
Epoch:  54%|█████▍    | 19/35 [11:18<0


	Validation Loss: 0.6813450909796215

	Validation acc: 0.8020172780594604

	Validation prec: 0.34494571870612106

	Validation rec: 0.27423519895605625

	Validation f1: 0.27638169405239893



  2%|▏         | 1/42 [00:00<00:31,  1.30it/s][A

loss: tensor(0.6096, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.28it/s][A

loss: tensor(0.7373, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.23it/s][A

loss: tensor(0.8216, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.25it/s][A

loss: tensor(0.6355, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.26it/s][A

loss: tensor(0.7627, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.26it/s][A

loss: tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.22it/s][A

loss: tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.23it/s][A

loss: tensor(0.7839, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.24it/s][A

loss: tensor(0.6754, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.24it/s][A

loss: tensor(0.6632, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.21it/s][A

loss: tensor(0.8104, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.6984, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.24it/s][A

loss: tensor(0.7303, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.25it/s][A

loss: tensor(0.7254, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.22it/s][A

loss: tensor(0.6478, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.25it/s][A

loss: tensor(0.8320, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.25it/s][A

loss: tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.22it/s][A

loss: tensor(0.6412, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.6774, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.25it/s][A

loss: tensor(0.7891, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.24it/s][A

loss: tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.22it/s][A

loss: tensor(0.6825, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.24it/s][A

loss: tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.25it/s][A

loss: tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.26it/s][A

loss: tensor(0.6659, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.21it/s][A

loss: tensor(0.6042, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.23it/s][A

loss: tensor(0.6662, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.24it/s][A

loss: tensor(0.6731, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.24it/s][A

loss: tensor(0.5511, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.20it/s][A

loss: tensor(0.6276, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:09,  1.21it/s][A

loss: tensor(0.6393, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.22it/s][A

loss: tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.23it/s][A

loss: tensor(0.4755, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.19it/s][A

loss: tensor(0.5658, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.21it/s][A

loss: tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.22it/s][A

loss: tensor(0.5767, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.23it/s][A

loss: tensor(0.6828, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.20it/s][A

loss: tensor(0.7998, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.22it/s][A

loss: tensor(0.6564, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.23it/s][A

loss: tensor(0.6685, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.22it/s][A

loss: tensor(0.6852, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.24it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.68it/s][A

loss: tensor(1.0453, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6818990345512118

	Training acc: 0.80644689735255

	Training prec: 0.41341285903826486

	Training rec: 0.255883532357564

	Training f1: 0.26215546450264343

	Current Learning rate:  0.00042857142857142855



 10%|▉         | 2/21 [00:00<00:02,  8.13it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.21it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.63it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.94it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  9.02it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  9.12it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  9.21it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.69it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.63it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.79it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.92it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.86it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.77it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.76it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.74it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.28it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.35it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.47it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.75it/s][A
Epoch:  57%|█████▋    | 20/35 [11:54<0


	Validation Loss: 0.6914529204368591

	Validation acc: 0.7979043036168152

	Validation prec: 0.36339334806573104

	Validation rec: 0.27432854911585847

	Validation f1: 0.27708249991516276



  2%|▏         | 1/42 [00:00<00:32,  1.26it/s][A

loss: tensor(0.5110, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.27it/s][A

loss: tensor(0.7597, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.27it/s][A

loss: tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:31,  1.22it/s][A

loss: tensor(0.7748, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:29,  1.23it/s][A

loss: tensor(0.6443, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.25it/s][A

loss: tensor(0.7176, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.26it/s][A

loss: tensor(0.7239, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.22it/s][A

loss: tensor(0.6899, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.23it/s][A

loss: tensor(0.5575, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.21it/s][A

loss: tensor(0.7198, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.20it/s][A

loss: tensor(0.9892, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.6336, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.26it/s][A

loss: tensor(0.5491, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.23it/s][A

loss: tensor(0.8823, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.24it/s][A

loss: tensor(0.6642, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.25it/s][A

loss: tensor(0.7017, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.26it/s][A

loss: tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.8244, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.24it/s][A

loss: tensor(0.7630, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.25it/s][A

loss: tensor(0.6224, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.26it/s][A

loss: tensor(0.7604, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.23it/s][A

loss: tensor(0.5898, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.24it/s][A

loss: tensor(0.5520, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.25it/s][A

loss: tensor(0.7202, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.26it/s][A

loss: tensor(0.6289, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.23it/s][A

loss: tensor(0.7651, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.24it/s][A

loss: tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.25it/s][A

loss: tensor(0.6850, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.26it/s][A

loss: tensor(0.6331, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:09,  1.22it/s][A

loss: tensor(0.6453, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.24it/s][A

loss: tensor(0.6743, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.25it/s][A

loss: tensor(0.4703, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.27it/s][A

loss: tensor(0.5585, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.22it/s][A

loss: tensor(0.7723, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.25it/s][A

loss: tensor(0.7796, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.25it/s][A

loss: tensor(0.6842, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.28it/s][A

loss: tensor(0.9821, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.25it/s][A

loss: tensor(0.5798, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.26it/s][A

loss: tensor(0.6014, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.26it/s][A

loss: tensor(0.7353, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.26it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.57it/s][A

loss: tensor(0.5775, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6807639861390704

	Training acc: 0.8060563082108908

	Training prec: 0.40240537846801805

	Training rec: 0.26278826896927565

	Training f1: 0.2688693809094383

	Current Learning rate:  0.0004



 10%|▉         | 2/21 [00:00<00:02,  8.49it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.41it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.42it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.45it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.49it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.33it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.08it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.22it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.31it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.39it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.43it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.40it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  7.97it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.13it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.00it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.28it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.58it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.78it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  8.96it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.6941457390785217

	Validation acc: 0.7963585780803358

	Validation prec: 0.32051252460467705

	Validation rec: 0.2566196727633239

	Validation f1: 0.25504199441602965



  2%|▏         | 1/42 [00:00<00:31,  1.31it/s][A

loss: tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:30,  1.29it/s][A

loss: tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.29it/s][A

loss: tensor(1.0449, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.23it/s][A

loss: tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.24it/s][A

loss: tensor(0.5349, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.25it/s][A

loss: tensor(0.6939, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.27it/s][A

loss: tensor(0.7627, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.24it/s][A

loss: tensor(0.6179, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.25it/s][A

loss: tensor(0.6021, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.26it/s][A

loss: tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.26it/s][A

loss: tensor(0.7207, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.6762, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:24,  1.21it/s][A

loss: tensor(0.6936, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.24it/s][A

loss: tensor(0.6017, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:11<00:21,  1.27it/s][A

loss: tensor(0.8905, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:21,  1.24it/s][A

loss: tensor(0.5542, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.26it/s][A

loss: tensor(0.7739, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:18,  1.26it/s][A

loss: tensor(0.7652, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.26it/s][A

loss: tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.23it/s][A

loss: tensor(0.5596, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.24it/s][A

loss: tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.25it/s][A

loss: tensor(0.6376, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.26it/s][A

loss: tensor(0.6476, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.22it/s][A

loss: tensor(0.8338, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.24it/s][A

loss: tensor(0.6101, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.25it/s][A

loss: tensor(0.6884, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.26it/s][A

loss: tensor(0.6861, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.22it/s][A

loss: tensor(0.6882, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.23it/s][A

loss: tensor(0.6517, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.24it/s][A

loss: tensor(0.6035, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.25it/s][A

loss: tensor(0.7097, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.21it/s][A

loss: tensor(0.6701, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.23it/s][A

loss: tensor(0.7339, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.24it/s][A

loss: tensor(0.6531, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.26it/s][A

loss: tensor(0.6774, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.22it/s][A

loss: tensor(0.7531, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.25it/s][A

loss: tensor(0.5428, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.26it/s][A

loss: tensor(0.6637, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.24it/s][A

loss: tensor(0.6250, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.25it/s][A

loss: tensor(0.7313, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.27it/s][A

loss: tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.26it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  9.49it/s][A

loss: tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6763946612675985

	Training acc: 0.8067745141749916

	Training prec: 0.39466822336924456

	Training rec: 0.2566038817312536

	Training f1: 0.264062095957476

	Current Learning rate:  0.00037142857142857143



 10%|▉         | 2/21 [00:00<00:02,  9.50it/s][A
 14%|█▍        | 3/21 [00:00<00:01,  9.41it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  9.40it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  9.36it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.92it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.79it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.70it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.65it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.59it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.54it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.51it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.47it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.06it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  7.99it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.08it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.17it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.25it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.29it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  7.97it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.7060630463418507

	Validation acc: 0.7961746433117031

	Validation prec: 0.3032341971369025

	Validation rec: 0.25275441411782773

	Validation f1: 0.24958941634369344



  2%|▏         | 1/42 [00:00<00:35,  1.15it/s][A

loss: tensor(0.6671, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:33,  1.20it/s][A

loss: tensor(0.5750, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.23it/s][A

loss: tensor(0.6585, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.23it/s][A

loss: tensor(0.8353, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:31,  1.19it/s][A

loss: tensor(0.7412, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.21it/s][A

loss: tensor(0.7784, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.22it/s][A

loss: tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.22it/s][A

loss: tensor(0.8038, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:28,  1.15it/s][A

loss: tensor(0.6863, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.19it/s][A

loss: tensor(0.7700, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:09<00:25,  1.22it/s][A

loss: tensor(0.6063, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.7067, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:24,  1.20it/s][A

loss: tensor(0.7188, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:23,  1.21it/s][A

loss: tensor(0.7307, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.22it/s][A

loss: tensor(0.7133, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:21,  1.23it/s][A

loss: tensor(0.6055, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:14<00:20,  1.21it/s][A

loss: tensor(0.6798, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.23it/s][A

loss: tensor(0.5429, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.6717, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.23it/s][A

loss: tensor(0.6504, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.20it/s][A

loss: tensor(0.7198, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:18<00:16,  1.21it/s][A

loss: tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.23it/s][A

loss: tensor(0.6976, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.24it/s][A

loss: tensor(0.7167, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.20it/s][A

loss: tensor(0.6503, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.21it/s][A

loss: tensor(0.6361, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:22<00:12,  1.22it/s][A

loss: tensor(0.7151, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:23<00:11,  1.23it/s][A

loss: tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.19it/s][A

loss: tensor(0.8619, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.20it/s][A

loss: tensor(0.5406, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:09,  1.21it/s][A

loss: tensor(0.6497, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.19it/s][A

loss: tensor(0.7608, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:27<00:07,  1.20it/s][A

loss: tensor(0.7411, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:28<00:06,  1.21it/s][A

loss: tensor(0.6279, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.22it/s][A

loss: tensor(0.6737, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:05,  1.19it/s][A

loss: tensor(0.5987, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.20it/s][A

loss: tensor(0.7100, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:31<00:03,  1.21it/s][A

loss: tensor(0.5174, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:32<00:02,  1.22it/s][A

loss: tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:33<00:01,  1.19it/s][A

loss: tensor(0.6153, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.20it/s][A

loss: tensor(0.7003, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:34<00:00,  1.23it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.08it/s][A

loss: tensor(0.5242, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6757635530971345

	Training acc: 0.8084643617978452

	Training prec: 0.41623337314844006

	Training rec: 0.26057960199471975

	Training f1: 0.2697322739259725

	Current Learning rate:  0.00034285714285714285



 10%|▉         | 2/21 [00:00<00:02,  7.54it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  7.93it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.08it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.16it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.21it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.21it/s][A
 38%|███▊      | 8/21 [00:01<00:01,  7.57it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  7.96it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.26it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.47it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.63it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.74it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.80it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.86it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.87it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.42it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.61it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.75it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.47it/s][A
Epoch:  66%|██████▌   | 23/35 [13:42<0


	Validation Loss: 0.7108249352091834

	Validation acc: 0.7954589390387972

	Validation prec: 0.3027870191378553

	Validation rec: 0.24330354691537387

	Validation f1: 0.23956510051982619



  2%|▏         | 1/42 [00:00<00:33,  1.22it/s][A

loss: tensor(0.6590, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:34,  1.17it/s][A

loss: tensor(0.6683, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:32,  1.21it/s][A

loss: tensor(0.6459, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.23it/s][A

loss: tensor(0.7015, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.20it/s][A

loss: tensor(0.5937, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:05<00:30,  1.18it/s][A

loss: tensor(0.7197, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.21it/s][A

loss: tensor(0.6567, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.23it/s][A

loss: tensor(0.7010, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.24it/s][A

loss: tensor(0.6024, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.20it/s][A

loss: tensor(0.5517, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:09<00:25,  1.22it/s][A

loss: tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.7562, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.24it/s][A

loss: tensor(0.7568, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:23,  1.21it/s][A

loss: tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.23it/s][A

loss: tensor(0.7638, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:21,  1.23it/s][A

loss: tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.24it/s][A

loss: tensor(0.6594, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.21it/s][A

loss: tensor(0.8478, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.22it/s][A

loss: tensor(0.5481, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.23it/s][A

loss: tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.21it/s][A

loss: tensor(0.5582, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:18<00:16,  1.22it/s][A

loss: tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.23it/s][A

loss: tensor(0.7085, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.24it/s][A

loss: tensor(0.6220, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.17it/s][A

loss: tensor(0.8103, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.21it/s][A

loss: tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:22<00:12,  1.24it/s][A

loss: tensor(0.6595, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.25it/s][A

loss: tensor(0.5670, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.21it/s][A

loss: tensor(0.6482, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.23it/s][A

loss: tensor(0.6515, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.24it/s][A

loss: tensor(0.5888, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.24it/s][A

loss: tensor(0.6200, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:27<00:07,  1.21it/s][A

loss: tensor(0.6819, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.23it/s][A

loss: tensor(0.8204, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.24it/s][A

loss: tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.25it/s][A

loss: tensor(0.5699, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.22it/s][A

loss: tensor(0.7484, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:31<00:03,  1.23it/s][A

loss: tensor(0.7691, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.25it/s][A

loss: tensor(0.6819, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.26it/s][A

loss: tensor(0.7509, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.22it/s][A

loss: tensor(0.6611, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.24it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.44it/s][A

loss: tensor(0.7258, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6721765271254948

	Training acc: 0.8091899702323753

	Training prec: 0.40117415746037804

	Training rec: 0.2578328506197318

	Training f1: 0.2650888339357825

	Current Learning rate:  0.00031428571428571427



 10%|▉         | 2/21 [00:00<00:02,  8.28it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.36it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.39it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.88it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.05it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.17it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.26it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.31it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.33it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  7.93it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  7.93it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.08it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.18it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.25it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.29it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.31it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.35it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  7.95it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.26it/s][A
Epoch:  69%|██████▊   | 24/35 [14:18<0


	Validation Loss: 0.6862387032735915

	Validation acc: 0.7989099134515559

	Validation prec: 0.33783088907230063

	Validation rec: 0.2647768397950364

	Validation f1: 0.26637461125467843



  2%|▏         | 1/42 [00:00<00:32,  1.26it/s][A

loss: tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.26it/s][A

loss: tensor(0.7618, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:32,  1.20it/s][A

loss: tensor(0.5326, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.23it/s][A

loss: tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:29,  1.24it/s][A

loss: tensor(0.7523, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.26it/s][A

loss: tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.22it/s][A

loss: tensor(0.6935, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.25it/s][A

loss: tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.25it/s][A

loss: tensor(0.7762, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.25it/s][A

loss: tensor(0.6082, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.22it/s][A

loss: tensor(0.7394, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.6473, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.24it/s][A

loss: tensor(0.7845, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.24it/s][A

loss: tensor(0.5867, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.21it/s][A

loss: tensor(0.6376, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.24it/s][A

loss: tensor(0.7236, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.25it/s][A

loss: tensor(0.7114, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.22it/s][A

loss: tensor(0.7673, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.7492, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.24it/s][A

loss: tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.25it/s][A

loss: tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.22it/s][A

loss: tensor(0.8870, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.25it/s][A

loss: tensor(0.7194, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.24it/s][A

loss: tensor(0.6403, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.25it/s][A

loss: tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.22it/s][A

loss: tensor(0.5565, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.23it/s][A

loss: tensor(0.9468, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.25it/s][A

loss: tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.25it/s][A

loss: tensor(0.5119, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.22it/s][A

loss: tensor(0.7449, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.24it/s][A

loss: tensor(0.7597, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.25it/s][A

loss: tensor(0.7274, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.25it/s][A

loss: tensor(0.5626, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.22it/s][A

loss: tensor(0.7676, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.24it/s][A

loss: tensor(0.5823, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.24it/s][A

loss: tensor(0.6480, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.25it/s][A

loss: tensor(0.7992, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.22it/s][A

loss: tensor(0.6077, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.23it/s][A

loss: tensor(0.7283, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.24it/s][A

loss: tensor(0.6836, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.25it/s][A

loss: tensor(0.5621, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.25it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.99it/s][A

loss: tensor(0.5970, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.681226501862208

	Training acc: 0.8066178149074206

	Training prec: 0.4031462474326549

	Training rec: 0.2577708709310536

	Training f1: 0.26465059049603845

	Current Learning rate:  0.0002857142857142857



 10%|▉         | 2/21 [00:00<00:02,  7.89it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  7.56it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  7.83it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.98it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.13it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.21it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.21it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.20it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  7.76it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  7.88it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.01it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.06it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.16it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.23it/s][A
 76%|███████▌  | 16/21 [00:02<00:00,  7.84it/s][A
 81%|████████  | 17/21 [00:02<00:00,  7.73it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  7.93it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.00it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.10it/s][A
Epoch:  71%|███████▏  | 25/35 [14:54<0


	Validation Loss: 0.6950230882281349

	Validation acc: 0.7974663118346634

	Validation prec: 0.30709621045533053

	Validation rec: 0.2626069467935056

	Validation f1: 0.26072761810000344



  2%|▏         | 1/42 [00:00<00:32,  1.28it/s][A

loss: tensor(0.8138, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.27it/s][A

loss: tensor(0.7870, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.27it/s][A

loss: tensor(0.8183, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:31,  1.22it/s][A

loss: tensor(0.6211, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:29,  1.24it/s][A

loss: tensor(0.6061, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.24it/s][A

loss: tensor(0.6854, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.25it/s][A

loss: tensor(0.7827, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:28,  1.21it/s][A

loss: tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.23it/s][A

loss: tensor(0.6013, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.24it/s][A

loss: tensor(0.6847, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.22it/s][A

loss: tensor(0.6754, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.7328, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.6555, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.26it/s][A

loss: tensor(0.6945, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.23it/s][A

loss: tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.25it/s][A

loss: tensor(0.6051, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.26it/s][A

loss: tensor(0.5432, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.26it/s][A

loss: tensor(0.7312, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.22it/s][A

loss: tensor(0.7345, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.24it/s][A

loss: tensor(0.6273, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.25it/s][A

loss: tensor(0.8105, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.26it/s][A

loss: tensor(0.6404, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.22it/s][A

loss: tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.24it/s][A

loss: tensor(0.5370, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.24it/s][A

loss: tensor(0.7035, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.25it/s][A

loss: tensor(0.6862, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.21it/s][A

loss: tensor(0.7534, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.23it/s][A

loss: tensor(0.7148, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.24it/s][A

loss: tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.25it/s][A

loss: tensor(0.6931, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:09,  1.21it/s][A

loss: tensor(0.7105, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.23it/s][A

loss: tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.24it/s][A

loss: tensor(0.5910, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.24it/s][A

loss: tensor(0.7750, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.21it/s][A

loss: tensor(0.7261, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.23it/s][A

loss: tensor(0.6372, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:04,  1.23it/s][A

loss: tensor(0.5694, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.24it/s][A

loss: tensor(0.5468, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.22it/s][A

loss: tensor(0.6468, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.24it/s][A

loss: tensor(0.6546, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.26it/s][A

loss: tensor(0.6448, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.25it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.47it/s][A

loss: tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6724501181216467

	Training acc: 0.808114617266648

	Training prec: 0.41198606158235895

	Training rec: 0.2534402854036778

	Training f1: 0.2600702927399464

	Current Learning rate:  0.0002571428571428571



 10%|▉         | 2/21 [00:00<00:02,  8.40it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.38it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  7.80it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.77it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  7.96it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.12it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.20it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.27it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.31it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.30it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.01it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.13it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.24it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  7.89it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.11it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.30it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.39it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.11it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  8.30it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.700452290830158

	Validation acc: 0.7973765139693705

	Validation prec: 0.2849577099342855

	Validation rec: 0.2501028001713313

	Validation f1: 0.24534725439424046



  2%|▏         | 1/42 [00:00<00:32,  1.28it/s][A

loss: tensor(0.6827, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.27it/s][A

loss: tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.27it/s][A

loss: tensor(0.8248, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:31,  1.22it/s][A

loss: tensor(0.6982, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.23it/s][A

loss: tensor(0.6973, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.24it/s][A

loss: tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.27it/s][A

loss: tensor(0.6133, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.23it/s][A

loss: tensor(0.4938, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.25it/s][A

loss: tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.25it/s][A

loss: tensor(0.6250, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:24,  1.25it/s][A

loss: tensor(0.6562, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.22it/s][A

loss: tensor(0.7095, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.23it/s][A

loss: tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.24it/s][A

loss: tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.24it/s][A

loss: tensor(0.6471, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:21,  1.21it/s][A

loss: tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.22it/s][A

loss: tensor(0.6119, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.24it/s][A

loss: tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.24it/s][A

loss: tensor(0.7380, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:18,  1.21it/s][A

loss: tensor(0.6277, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:17,  1.23it/s][A

loss: tensor(0.7254, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.24it/s][A

loss: tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.24it/s][A

loss: tensor(0.7558, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.21it/s][A

loss: tensor(0.8400, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.22it/s][A

loss: tensor(0.7920, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.23it/s][A

loss: tensor(0.5745, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.25it/s][A

loss: tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.23it/s][A

loss: tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.24it/s][A

loss: tensor(0.6347, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.24it/s][A

loss: tensor(0.6714, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.25it/s][A

loss: tensor(0.6666, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.22it/s][A

loss: tensor(0.5791, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.24it/s][A

loss: tensor(0.6131, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.24it/s][A

loss: tensor(0.7335, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.25it/s][A

loss: tensor(0.6441, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.21it/s][A

loss: tensor(0.8110, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:04,  1.23it/s][A

loss: tensor(0.6172, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.24it/s][A

loss: tensor(0.7231, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.21it/s][A

loss: tensor(0.7196, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.23it/s][A

loss: tensor(0.7505, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.24it/s][A

loss: tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.25it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.84it/s][A

loss: tensor(0.7811, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6744551325128192

	Training acc: 0.8074702716011664

	Training prec: 0.4130670620027919

	Training rec: 0.2587109305341007

	Training f1: 0.26733474377462557

	Current Learning rate:  0.00022857142857142857



 10%|▉         | 2/21 [00:00<00:02,  8.15it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.28it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.33it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.36it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.15it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  7.96it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.13it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.24it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.30it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.32it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.35it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.37it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  7.90it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  7.92it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.09it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.20it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.22it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.24it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  7.90it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.6893255384195418

	Validation acc: 0.7981341496886287

	Validation prec: 0.31718142665547233

	Validation rec: 0.26192232358529577

	Validation f1: 0.25997327517449



  2%|▏         | 1/42 [00:00<00:35,  1.16it/s][A

loss: tensor(0.7223, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:32,  1.24it/s][A

loss: tensor(0.7852, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.26it/s][A

loss: tensor(0.8319, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.26it/s][A

loss: tensor(0.7683, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.22it/s][A

loss: tensor(0.7206, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.23it/s][A

loss: tensor(0.6134, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.24it/s][A

loss: tensor(0.6141, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.25it/s][A

loss: tensor(0.7189, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:27,  1.21it/s][A

loss: tensor(0.7002, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.23it/s][A

loss: tensor(0.7282, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.23it/s][A

loss: tensor(0.7616, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.6024, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:24,  1.21it/s][A

loss: tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.22it/s][A

loss: tensor(0.6614, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.21it/s][A

loss: tensor(0.6384, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:21,  1.24it/s][A

loss: tensor(0.6957, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.22it/s][A

loss: tensor(0.6302, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.23it/s][A

loss: tensor(0.7433, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.5575, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.24it/s][A

loss: tensor(0.5969, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.21it/s][A

loss: tensor(0.5856, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.24it/s][A

loss: tensor(0.6419, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.25it/s][A

loss: tensor(0.7854, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.25it/s][A

loss: tensor(0.7245, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.21it/s][A

loss: tensor(0.7399, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.23it/s][A

loss: tensor(0.6189, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.23it/s][A

loss: tensor(0.7692, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.24it/s][A

loss: tensor(0.6030, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.21it/s][A

loss: tensor(0.4989, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.22it/s][A

loss: tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.23it/s][A

loss: tensor(0.8820, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.20it/s][A

loss: tensor(0.5881, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.21it/s][A

loss: tensor(0.5280, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.22it/s][A

loss: tensor(0.6663, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.23it/s][A

loss: tensor(0.6752, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:05,  1.20it/s][A

loss: tensor(0.6853, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.22it/s][A

loss: tensor(0.6814, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.23it/s][A

loss: tensor(0.4609, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.24it/s][A

loss: tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.21it/s][A

loss: tensor(0.6813, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.23it/s][A

loss: tensor(0.7549, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.24it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.48it/s][A

loss: tensor(0.5137, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6685387314785094

	Training acc: 0.8103453371520999

	Training prec: 0.43870729905718137

	Training rec: 0.26281425309593626

	Training f1: 0.2725380597220358

	Current Learning rate:  0.0002



 10%|▉         | 2/21 [00:00<00:02,  8.39it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.39it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  7.89it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.96it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.33it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.50it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.64it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.71it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.77it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.49it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.38it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.58it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.73it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.79it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.85it/s][A
 81%|████████  | 17/21 [00:01<00:00,  8.88it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.87it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.81it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.64it/s][A
Epoch:  80%|████████  | 28/35 [16:43<0


	Validation Loss: 0.694190821477345

	Validation acc: 0.7979033547561888

	Validation prec: 0.3007118380830686

	Validation rec: 0.2528809304932898

	Validation f1: 0.24955303276462126



  2%|▏         | 1/42 [00:00<00:32,  1.26it/s][A

loss: tensor(0.6037, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:33,  1.19it/s][A

loss: tensor(0.7163, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.22it/s][A

loss: tensor(0.7409, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.24it/s][A

loss: tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:29,  1.25it/s][A

loss: tensor(0.5604, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.22it/s][A

loss: tensor(0.5835, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.23it/s][A

loss: tensor(0.7633, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.24it/s][A

loss: tensor(0.6068, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.24it/s][A

loss: tensor(0.6017, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.21it/s][A

loss: tensor(0.6636, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.22it/s][A

loss: tensor(0.5199, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.6739, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.24it/s][A

loss: tensor(0.6105, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:23,  1.21it/s][A

loss: tensor(0.6670, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.22it/s][A

loss: tensor(0.6972, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:21,  1.23it/s][A

loss: tensor(0.5053, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.24it/s][A

loss: tensor(0.7110, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.21it/s][A

loss: tensor(0.6491, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.24it/s][A

loss: tensor(0.6582, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.26it/s][A

loss: tensor(0.7459, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.23it/s][A

loss: tensor(0.7280, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.25it/s][A

loss: tensor(0.6690, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.25it/s][A

loss: tensor(0.6777, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.26it/s][A

loss: tensor(0.9219, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.20it/s][A

loss: tensor(0.7094, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.23it/s][A

loss: tensor(0.7044, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.24it/s][A

loss: tensor(0.5535, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.25it/s][A

loss: tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.21it/s][A

loss: tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.23it/s][A

loss: tensor(0.7457, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.24it/s][A

loss: tensor(0.7415, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.25it/s][A

loss: tensor(0.7355, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.21it/s][A

loss: tensor(0.8422, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.23it/s][A

loss: tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.24it/s][A

loss: tensor(0.6382, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.24it/s][A

loss: tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.21it/s][A

loss: tensor(0.6724, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.23it/s][A

loss: tensor(0.7851, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.25it/s][A

loss: tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.25it/s][A

loss: tensor(0.7471, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.23it/s][A

loss: tensor(0.6089, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.25it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.34it/s][A

loss: tensor(0.5632, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6713951258432298

	Training acc: 0.810930147107091

	Training prec: 0.4613397600371062

	Training rec: 0.26515642593665284

	Training f1: 0.2772229988822929

	Current Learning rate:  0.00017142857142857143



 10%|▉         | 2/21 [00:00<00:02,  8.31it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.36it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  7.95it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.93it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.12it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.23it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.28it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.32it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.37it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.39it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  7.97it/s][A
 62%|██████▏   | 13/21 [00:01<00:01,  7.96it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.11it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.19it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.26it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.30it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  7.94it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  7.94it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.24it/s][A
Epoch:  83%|████████▎ | 29/35 [17:19<0


	Validation Loss: 0.6812756728558313

	Validation acc: 0.8024845332101184

	Validation prec: 0.31500501722540136

	Validation rec: 0.25913379560547156

	Validation f1: 0.2582222116299625



  2%|▏         | 1/42 [00:00<00:32,  1.25it/s][A

loss: tensor(0.6869, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.26it/s][A

loss: tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:32,  1.21it/s][A

loss: tensor(0.7681, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.23it/s][A

loss: tensor(0.5520, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:29,  1.25it/s][A

loss: tensor(0.6284, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.26it/s][A

loss: tensor(0.6534, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.22it/s][A

loss: tensor(0.6858, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.24it/s][A

loss: tensor(0.7652, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.25it/s][A

loss: tensor(0.6638, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.25it/s][A

loss: tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.21it/s][A

loss: tensor(0.5452, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.23it/s][A

loss: tensor(0.6510, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.24it/s][A

loss: tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.25it/s][A

loss: tensor(0.9532, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.21it/s][A

loss: tensor(0.5403, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:21,  1.23it/s][A

loss: tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.24it/s][A

loss: tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.21it/s][A

loss: tensor(0.5885, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.23it/s][A

loss: tensor(0.7413, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.24it/s][A

loss: tensor(0.7268, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.25it/s][A

loss: tensor(0.6146, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.22it/s][A

loss: tensor(0.6531, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.25it/s][A

loss: tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.26it/s][A

loss: tensor(0.6982, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.26it/s][A

loss: tensor(0.5900, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.23it/s][A

loss: tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.24it/s][A

loss: tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.24it/s][A

loss: tensor(0.7816, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.26it/s][A

loss: tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.23it/s][A

loss: tensor(0.7254, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.24it/s][A

loss: tensor(0.5971, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.25it/s][A

loss: tensor(0.7499, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.26it/s][A

loss: tensor(0.5609, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.21it/s][A

loss: tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.23it/s][A

loss: tensor(0.6897, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.24it/s][A

loss: tensor(0.6899, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.26it/s][A

loss: tensor(0.7532, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.23it/s][A

loss: tensor(0.5744, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.24it/s][A

loss: tensor(0.6805, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.26it/s][A

loss: tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.27it/s][A

loss: tensor(0.7929, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.26it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.79it/s][A

loss: tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6706568258149284

	Training acc: 0.8079676742735561

	Training prec: 0.4049329528083239

	Training rec: 0.25987763561921834

	Training f1: 0.2672071571305904

	Current Learning rate:  0.00014285714285714284



 10%|▉         | 2/21 [00:00<00:02,  8.18it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.15it/s][A
 19%|█▉        | 4/21 [00:00<00:01,  8.64it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.94it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.41it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.55it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.81it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  9.04it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  9.16it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  9.23it/s][A
 57%|█████▋    | 12/21 [00:01<00:00,  9.24it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.61it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.86it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  9.04it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  9.15it/s][A
 81%|████████  | 17/21 [00:01<00:00,  9.27it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  9.34it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  9.36it/s][A
100%|██████████| 21/21 [00:02<00:00,  9.00it/s][A
Epoch:  86%|████████▌ | 30/35 [17:55<0


	Validation Loss: 0.6923079547427949

	Validation acc: 0.7963803878567104

	Validation prec: 0.31694653698447783

	Validation rec: 0.25953259982017485

	Validation f1: 0.25908485001738174



  2%|▏         | 1/42 [00:00<00:31,  1.29it/s][A

loss: tensor(0.5299, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:30,  1.30it/s][A

loss: tensor(0.6394, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.28it/s][A

loss: tensor(0.5530, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.23it/s][A

loss: tensor(0.7212, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.25it/s][A

loss: tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.26it/s][A

loss: tensor(0.6431, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.26it/s][A

loss: tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.22it/s][A

loss: tensor(0.5498, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.24it/s][A

loss: tensor(0.5841, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.25it/s][A

loss: tensor(0.6378, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.23it/s][A

loss: tensor(0.6405, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.8139, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.8314, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.26it/s][A

loss: tensor(0.8958, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.23it/s][A

loss: tensor(0.7296, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.25it/s][A

loss: tensor(0.6156, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.27it/s][A

loss: tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:18,  1.28it/s][A

loss: tensor(0.6277, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.25it/s][A

loss: tensor(0.6691, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:15<00:17,  1.27it/s][A

loss: tensor(0.6738, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.27it/s][A

loss: tensor(0.7964, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:15,  1.27it/s][A

loss: tensor(0.7627, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.24it/s][A

loss: tensor(0.5673, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.25it/s][A

loss: tensor(0.5197, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:19<00:13,  1.26it/s][A

loss: tensor(0.6004, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.27it/s][A

loss: tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.25it/s][A

loss: tensor(0.8582, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.26it/s][A

loss: tensor(0.7445, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.26it/s][A

loss: tensor(0.7480, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:23<00:09,  1.27it/s][A

loss: tensor(0.7408, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.23it/s][A

loss: tensor(0.5999, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.24it/s][A

loss: tensor(0.7041, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.26it/s][A

loss: tensor(0.6864, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.27it/s][A

loss: tensor(0.6513, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:27<00:05,  1.24it/s][A

loss: tensor(0.6763, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.25it/s][A

loss: tensor(0.5897, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.26it/s][A

loss: tensor(0.7187, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.26it/s][A

loss: tensor(0.7428, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.22it/s][A

loss: tensor(0.7693, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:31<00:01,  1.24it/s][A

loss: tensor(0.4975, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.25it/s][A

loss: tensor(0.8534, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.27it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.14it/s][A

loss: tensor(0.7651, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6794106513261795

	Training acc: 0.8077047837492289

	Training prec: 0.3985365346949167

	Training rec: 0.26333858744547495

	Training f1: 0.2722956670439326

	Current Learning rate:  0.00011428571428571428



 10%|▉         | 2/21 [00:00<00:02,  7.74it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.06it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.24it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.33it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.39it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.19it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.04it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.17it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.25it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.31it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.34it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.39it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.41it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.37it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.06it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.20it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.28it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.34it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  8.39it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.6832110825039092

	Validation acc: 0.8001299871457991

	Validation prec: 0.3257884066935545

	Validation rec: 0.26171347467831485

	Validation f1: 0.2612746872186057



  2%|▏         | 1/42 [00:00<00:31,  1.29it/s][A

loss: tensor(0.5701, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.28it/s][A

loss: tensor(0.6244, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:30,  1.28it/s][A

loss: tensor(0.7851, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.23it/s][A

loss: tensor(0.5325, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.24it/s][A

loss: tensor(0.7578, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.25it/s][A

loss: tensor(0.8115, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:27,  1.26it/s][A

loss: tensor(0.5265, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.22it/s][A

loss: tensor(0.9344, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.24it/s][A

loss: tensor(0.7697, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:25,  1.25it/s][A

loss: tensor(0.6402, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.23it/s][A

loss: tensor(0.6475, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.20it/s][A

loss: tensor(0.6289, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.22it/s][A

loss: tensor(0.6423, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.23it/s][A

loss: tensor(0.6752, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.24it/s][A

loss: tensor(0.6485, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:21,  1.23it/s][A

loss: tensor(0.7104, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.24it/s][A

loss: tensor(0.7498, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.25it/s][A

loss: tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.25it/s][A

loss: tensor(0.7223, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:18,  1.21it/s][A

loss: tensor(0.6735, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:17,  1.23it/s][A

loss: tensor(0.6561, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.25it/s][A

loss: tensor(0.6693, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.25it/s][A

loss: tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.22it/s][A

loss: tensor(0.6522, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.24it/s][A

loss: tensor(0.6944, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:12,  1.25it/s][A

loss: tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:11,  1.26it/s][A

loss: tensor(0.6441, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.19it/s][A

loss: tensor(0.6866, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.22it/s][A

loss: tensor(0.5908, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.24it/s][A

loss: tensor(0.7495, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.25it/s][A

loss: tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.22it/s][A

loss: tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.24it/s][A

loss: tensor(0.5626, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.25it/s][A

loss: tensor(0.5972, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.26it/s][A

loss: tensor(0.6135, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.23it/s][A

loss: tensor(0.6914, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:04,  1.25it/s][A

loss: tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.25it/s][A

loss: tensor(0.8465, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.22it/s][A

loss: tensor(0.6202, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.23it/s][A

loss: tensor(0.6017, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.24it/s][A

loss: tensor(0.7030, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.25it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.28it/s][A

loss: tensor(0.6392, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6722204372996375

	Training acc: 0.8085848023238226

	Training prec: 0.40616927748486387

	Training rec: 0.2607359287599534

	Training f1: 0.26746806678410245

	Current Learning rate:  8.571428571428571e-05



 10%|▉         | 2/21 [00:00<00:02,  7.89it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.04it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.14it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.20it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.34it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  7.85it/s][A
 38%|███▊      | 8/21 [00:01<00:01,  7.76it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  7.91it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.08it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.07it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.14it/s][A
 62%|██████▏   | 13/21 [00:01<00:01,  7.71it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  7.84it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  7.98it/s][A
 76%|███████▌  | 16/21 [00:02<00:00,  8.04it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.11it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.08it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.05it/s][A
 95%|█████████▌| 20/21 [00:02<00:00,  7.84it/s][A
100%|██████████| 21/21 [00:02<00:00,  


	Validation Loss: 0.6880850195884705

	Validation acc: 0.7997686980353202

	Validation prec: 0.31848444459070807

	Validation rec: 0.25152963655062843

	Validation f1: 0.2502102961137811



  2%|▏         | 1/42 [00:00<00:36,  1.12it/s][A

loss: tensor(0.7436, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:33,  1.20it/s][A

loss: tensor(0.8040, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.23it/s][A

loss: tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.25it/s][A

loss: tensor(0.6739, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:04<00:30,  1.21it/s][A

loss: tensor(0.7455, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.23it/s][A

loss: tensor(0.7570, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.24it/s][A

loss: tensor(0.5328, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.24it/s][A

loss: tensor(0.6574, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:27,  1.21it/s][A

loss: tensor(0.5952, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.22it/s][A

loss: tensor(0.7243, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.23it/s][A

loss: tensor(0.6713, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:24,  1.20it/s][A

loss: tensor(0.6398, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.22it/s][A

loss: tensor(0.8035, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:21,  1.23it/s][A

loss: tensor(0.6718, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:20,  1.24it/s][A

loss: tensor(0.7258, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.21it/s][A

loss: tensor(0.6846, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.23it/s][A

loss: tensor(0.6505, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.25it/s][A

loss: tensor(0.8016, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.25it/s][A

loss: tensor(0.8560, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.22it/s][A

loss: tensor(0.7344, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.23it/s][A

loss: tensor(0.7252, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.24it/s][A

loss: tensor(0.5969, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.22it/s][A

loss: tensor(0.6922, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.22it/s][A

loss: tensor(0.6083, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:12,  1.24it/s][A

loss: tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.25it/s][A

loss: tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.25it/s][A

loss: tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.21it/s][A

loss: tensor(0.7526, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.22it/s][A

loss: tensor(0.5659, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:08,  1.23it/s][A

loss: tensor(0.5761, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.21it/s][A

loss: tensor(0.4919, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.22it/s][A

loss: tensor(0.6888, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.23it/s][A

loss: tensor(0.5385, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.24it/s][A

loss: tensor(0.5482, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.21it/s][A

loss: tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.22it/s][A

loss: tensor(0.6943, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.22it/s][A

loss: tensor(0.6665, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.23it/s][A

loss: tensor(0.6476, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.20it/s][A

loss: tensor(0.6114, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:33<00:00,  1.22it/s][A

loss: tensor(0.6977, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.24it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.10it/s][A

loss: tensor(0.9114, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6743121211017881

	Training acc: 0.8062341925454499

	Training prec: 0.4115089643454389

	Training rec: 0.2563469456612194

	Training f1: 0.26399613968282687

	Current Learning rate:  5.714285714285714e-05



 10%|▉         | 2/21 [00:00<00:02,  7.57it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.00it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.19it/s][A
 24%|██▍       | 5/21 [00:00<00:01,  8.27it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.37it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.36it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.43it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  7.96it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  7.98it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.15it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.25it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.32it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.38it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  7.90it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  7.86it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.03it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  7.84it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.25it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.29it/s][A
Epoch:  94%|█████████▍| 33/35 [19:43<0


	Validation Loss: 0.6906222786222186

	Validation acc: 0.7998276732221234

	Validation prec: 0.2959012343852227

	Validation rec: 0.24914884989437797

	Validation f1: 0.2456545680432339



  2%|▏         | 1/42 [00:00<00:30,  1.33it/s][A

loss: tensor(0.6790, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:32,  1.23it/s][A

loss: tensor(0.7310, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.25it/s][A

loss: tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.26it/s][A

loss: tensor(0.6437, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:29,  1.26it/s][A

loss: tensor(0.6674, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:29,  1.22it/s][A

loss: tensor(0.7523, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.23it/s][A

loss: tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.23it/s][A

loss: tensor(0.7201, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.23it/s][A

loss: tensor(0.5686, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:08<00:26,  1.19it/s][A

loss: tensor(0.7080, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.20it/s][A

loss: tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.20it/s][A

loss: tensor(0.8063, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.22it/s][A

loss: tensor(0.5781, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:23,  1.19it/s][A

loss: tensor(0.6992, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.21it/s][A

loss: tensor(0.7322, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:13<00:21,  1.22it/s][A

loss: tensor(0.7033, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:20,  1.22it/s][A

loss: tensor(0.6578, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:20,  1.18it/s][A

loss: tensor(0.7775, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:19,  1.19it/s][A

loss: tensor(0.6617, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:18,  1.20it/s][A

loss: tensor(0.6235, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:17<00:17,  1.18it/s][A

loss: tensor(0.7755, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:18<00:16,  1.19it/s][A

loss: tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.20it/s][A

loss: tensor(0.5630, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.21it/s][A

loss: tensor(0.5851, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:14,  1.18it/s][A

loss: tensor(0.7613, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:21<00:13,  1.20it/s][A

loss: tensor(0.5851, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:22<00:12,  1.20it/s][A

loss: tensor(0.5297, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:23<00:11,  1.21it/s][A

loss: tensor(0.7212, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:24<00:10,  1.18it/s][A

loss: tensor(0.7717, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:10,  1.19it/s][A

loss: tensor(0.5084, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:25<00:09,  1.20it/s][A

loss: tensor(0.5927, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:26<00:08,  1.21it/s][A

loss: tensor(0.9304, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:27<00:07,  1.18it/s][A

loss: tensor(0.7176, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:28<00:06,  1.20it/s][A

loss: tensor(0.5863, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.22it/s][A

loss: tensor(0.7067, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:29<00:04,  1.22it/s][A

loss: tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:30<00:04,  1.19it/s][A

loss: tensor(0.7347, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:31<00:03,  1.20it/s][A

loss: tensor(0.7684, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:32<00:02,  1.22it/s][A

loss: tensor(0.6353, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:33<00:01,  1.20it/s][A

loss: tensor(0.5621, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:34<00:00,  1.18it/s][A

loss: tensor(0.5244, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:34<00:00,  1.22it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  8.21it/s][A

loss: tensor(0.5677, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6701886057853699

	Training acc: 0.8094486337306037

	Training prec: 0.4303249047696198

	Training rec: 0.2631336796535186

	Training f1: 0.2726957978131422

	Current Learning rate:  2.857142857142857e-05



 10%|▉         | 2/21 [00:00<00:02,  8.21it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.33it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  7.89it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.88it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.06it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.14it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.23it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.30it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.34it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  8.36it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  7.90it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.06it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.17it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.26it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.30it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.34it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  7.85it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  7.90it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.21it/s][A
Epoch:  97%|█████████▋| 34/35 [20:20<0


	Validation Loss: 0.6751891601653326

	Validation acc: 0.8025846342490544

	Validation prec: 0.3091556896653185

	Validation rec: 0.25706886340281226

	Validation f1: 0.25548524976155873



  2%|▏         | 1/42 [00:00<00:32,  1.27it/s][A

loss: tensor(0.5696, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▍         | 2/42 [00:01<00:31,  1.27it/s][A

loss: tensor(0.9039, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 3/42 [00:02<00:31,  1.23it/s][A

loss: tensor(0.7039, device='cuda:0', grad_fn=<NllLossBackward>)



 10%|▉         | 4/42 [00:03<00:30,  1.26it/s][A

loss: tensor(0.5723, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 5/42 [00:03<00:28,  1.28it/s][A

loss: tensor(0.6167, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 6/42 [00:04<00:28,  1.28it/s][A

loss: tensor(0.7999, device='cuda:0', grad_fn=<NllLossBackward>)



 17%|█▋        | 7/42 [00:05<00:28,  1.24it/s][A

loss: tensor(0.6977, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 8/42 [00:06<00:27,  1.25it/s][A

loss: tensor(0.5792, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██▏       | 9/42 [00:07<00:26,  1.24it/s][A

loss: tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)



 24%|██▍       | 10/42 [00:07<00:25,  1.26it/s][A

loss: tensor(0.6187, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▌       | 11/42 [00:08<00:25,  1.23it/s][A

loss: tensor(0.5990, device='cuda:0', grad_fn=<NllLossBackward>)



 29%|██▊       | 12/42 [00:09<00:24,  1.24it/s][A

loss: tensor(0.6822, device='cuda:0', grad_fn=<NllLossBackward>)



 31%|███       | 13/42 [00:10<00:23,  1.25it/s][A

loss: tensor(0.6424, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 14/42 [00:11<00:22,  1.26it/s][A

loss: tensor(0.5847, device='cuda:0', grad_fn=<NllLossBackward>)



 36%|███▌      | 15/42 [00:12<00:22,  1.22it/s][A

loss: tensor(0.6067, device='cuda:0', grad_fn=<NllLossBackward>)



 38%|███▊      | 16/42 [00:12<00:20,  1.24it/s][A

loss: tensor(0.6095, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 17/42 [00:13<00:19,  1.26it/s][A

loss: tensor(0.6463, device='cuda:0', grad_fn=<NllLossBackward>)



 43%|████▎     | 18/42 [00:14<00:19,  1.23it/s][A

loss: tensor(0.5932, device='cuda:0', grad_fn=<NllLossBackward>)



 45%|████▌     | 19/42 [00:15<00:18,  1.24it/s][A

loss: tensor(0.6799, device='cuda:0', grad_fn=<NllLossBackward>)



 48%|████▊     | 20/42 [00:16<00:17,  1.25it/s][A

loss: tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)



 50%|█████     | 21/42 [00:16<00:16,  1.26it/s][A

loss: tensor(0.7285, device='cuda:0', grad_fn=<NllLossBackward>)



 52%|█████▏    | 22/42 [00:17<00:16,  1.22it/s][A

loss: tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)



 55%|█████▍    | 23/42 [00:18<00:15,  1.25it/s][A

loss: tensor(0.8408, device='cuda:0', grad_fn=<NllLossBackward>)



 57%|█████▋    | 24/42 [00:19<00:14,  1.26it/s][A

loss: tensor(0.7412, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 25/42 [00:20<00:13,  1.26it/s][A

loss: tensor(0.6022, device='cuda:0', grad_fn=<NllLossBackward>)



 62%|██████▏   | 26/42 [00:20<00:13,  1.23it/s][A

loss: tensor(0.8696, device='cuda:0', grad_fn=<NllLossBackward>)



 64%|██████▍   | 27/42 [00:21<00:12,  1.24it/s][A

loss: tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 28/42 [00:22<00:11,  1.25it/s][A

loss: tensor(0.7668, device='cuda:0', grad_fn=<NllLossBackward>)



 69%|██████▉   | 29/42 [00:23<00:10,  1.26it/s][A

loss: tensor(0.6398, device='cuda:0', grad_fn=<NllLossBackward>)



 71%|███████▏  | 30/42 [00:24<00:09,  1.22it/s][A

loss: tensor(0.6020, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▍  | 31/42 [00:24<00:08,  1.24it/s][A

loss: tensor(0.5963, device='cuda:0', grad_fn=<NllLossBackward>)



 76%|███████▌  | 32/42 [00:25<00:08,  1.25it/s][A

loss: tensor(0.6588, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▊  | 33/42 [00:26<00:07,  1.25it/s][A

loss: tensor(0.5529, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 34/42 [00:27<00:06,  1.22it/s][A

loss: tensor(0.5327, device='cuda:0', grad_fn=<NllLossBackward>)



 83%|████████▎ | 35/42 [00:28<00:05,  1.23it/s][A

loss: tensor(0.7399, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 36/42 [00:28<00:04,  1.24it/s][A

loss: tensor(0.7002, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 37/42 [00:29<00:03,  1.27it/s][A

loss: tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)



 90%|█████████ | 38/42 [00:30<00:03,  1.24it/s][A

loss: tensor(0.8260, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 39/42 [00:31<00:02,  1.25it/s][A

loss: tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▌| 40/42 [00:32<00:01,  1.27it/s][A

loss: tensor(0.8364, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 41/42 [00:32<00:00,  1.27it/s][A

loss: tensor(0.6375, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 42/42 [00:33<00:00,  1.26it/s][A

  0%|          | 0/21 [00:00<?, ?it/s][A
  5%|▍         | 1/21 [00:00<00:02,  7.99it/s][A

loss: tensor(0.9185, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6781209820792788

	Training acc: 0.804862906766341

	Training prec: 0.40170088491008954

	Training rec: 0.25655920413401506

	Training f1: 0.2634737114814955

	Current Learning rate:  0.0



 10%|▉         | 2/21 [00:00<00:02,  8.32it/s][A
 14%|█▍        | 3/21 [00:00<00:02,  8.41it/s][A
 19%|█▉        | 4/21 [00:00<00:02,  8.42it/s][A
 24%|██▍       | 5/21 [00:00<00:02,  7.91it/s][A
 29%|██▊       | 6/21 [00:00<00:01,  8.08it/s][A
 33%|███▎      | 7/21 [00:00<00:01,  8.21it/s][A
 38%|███▊      | 8/21 [00:00<00:01,  8.27it/s][A
 43%|████▎     | 9/21 [00:01<00:01,  8.33it/s][A
 48%|████▊     | 10/21 [00:01<00:01,  8.36it/s][A
 52%|█████▏    | 11/21 [00:01<00:01,  7.61it/s][A
 57%|█████▋    | 12/21 [00:01<00:01,  8.00it/s][A
 62%|██████▏   | 13/21 [00:01<00:00,  8.26it/s][A
 67%|██████▋   | 14/21 [00:01<00:00,  8.46it/s][A
 71%|███████▏  | 15/21 [00:01<00:00,  8.59it/s][A
 76%|███████▌  | 16/21 [00:01<00:00,  8.74it/s][A
 81%|████████  | 17/21 [00:02<00:00,  8.84it/s][A
 86%|████████▌ | 18/21 [00:02<00:00,  8.90it/s][A
 90%|█████████ | 19/21 [00:02<00:00,  8.94it/s][A
100%|██████████| 21/21 [00:02<00:00,  8.49it/s][A
Epoch: 100%|██████████| 35/35 [20:56<0


	Validation Loss: 0.6859388365632012

	Validation acc: 0.7986383737103446

	Validation prec: 0.3120534801137053

	Validation rec: 0.26198095375099123

	Validation f1: 0.2622532340693371





### Evaluation on the test dataset

In [31]:

############ test eval metrics ######################
nb_test_steps = 0 # Tracking variables
test_loss = []
test_acc = []
test_prec = []
test_rec = []
test_f1 = []

########################################################
for batch in tqdm(test_loader):
    batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
    t_input_ids, t_input_mask, t_token_type_ids, t_labels, t_bio_tags = batch     # unpack inputs from dataloader

    with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
        model.eval() # put model in evaluation mode for validation set
        logits = model(**{"input_ids":t_input_ids, "attention_mask":t_input_mask, "token_type_ids":t_token_type_ids}) # forward pass, calculates logit predictions

    ######################################################

    # similar to the class RobertaForToken classification in transformers: https://github.com/huggingface/transformers/blob/master/src/transformers/models/roberta/modeling_roberta.py
    t_active_loss = t_input_mask.view(-1) == 1  # either based on attention_mask (includes <CLS>, <SEP> token)
    t_active_logits = logits.view(-1, N_bio_tags)[t_active_loss] # 5 
    t_active_tags = t_bio_tags.view(-1)[t_active_loss]
    t_loss = loss_fn(t_active_logits, t_active_tags)             
    test_loss.append(t_loss.item())

    #########################################################
    logits = logits.detach().to('cpu').numpy()
    tags_ids = t_bio_tags.to('cpu').numpy()

    # calculate performance measures only on tokens and not subwords or special tokens
    tags_mask = tags_ids != -100 # only get token labels and not labels from subwords or special tokens
    pred = np.argmax(logits, axis=2)[tags_mask] #.flatten() # convert logits to list of predicted labels
    tags = tags_ids[tags_mask]#.flatten()                          

    metrics = compute_metrics(pred, tags)
    test_acc.append(metrics["accuracy"])
    test_prec.append(metrics["precision"])
    test_rec.append(metrics["recall"])
    test_f1.append(metrics["f1"])

    nb_test_steps += 1

print(F'\n\tTest Loss: {np.mean(test_loss)}')
print(F'\n\tTest acc: {np.mean(test_acc)}')
print(F'\n\tTest prec: {np.mean(test_prec)}')
print(F'\n\tTest rec: {np.mean(test_rec)}')
print(F'\n\tTest f1: {np.mean(test_f1)}')


  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
100%|██████████| 26/26 [00:03<00:00,  6.77it/s]


	Test Loss: 0.6172949912456366

	Test acc: 0.8216471325840966

	Test prec: 0.3281231828328574

	Test rec: 0.268590702991532

	Test f1: 0.2718227535248452





### bio tags back to tokens

In [32]:
# take last batch of test set:
t_input_ids, t_input_mask, t_token_type_ids, t_labels, t_bio_tags = batch 

for i in range(len(batch)):
    tags_mask = t_bio_tags[i].to("cpu").numpy() != -100 # only get token labels and not labels from subwords or special tokens
    pred = np.argmax(logits[i], axis=1)[tags_mask]
    true_tags = t_bio_tags[i][tags_mask].to("cpu").numpy()    
    

    tokens = tokenizer.convert_ids_to_tokens(t_input_ids[i])

    print("\n\nPadded Sentence:")
    print(tokens)
    print("\n")
    print("-------")
    print("true labels:")
    print(t_bio_tags[i])
    for token, true_label, pred in zip(np.array(tokens)[tags_mask], true_tags, pred):
        print(token, "\t\ttrue:", true_label, "  pred:", pred)

    
    break


    




Padded Sentence:
['<s>', ':raised_@@', 'back@@', '_of_@@', 'hand@@', ':', 'Raise', 'your', 'hand', 'if', 'you', "'ve", 'heard', 'this', 'lie', 'about', '#@@', 'diabetes', '"', 'You', "'re", 'a', 'diabetic', 'because', 'you', 'eat', 'too', 'much', 'or', 'work', 'out', 'too', 'little', '.', '</s>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']


-------
true labels:
tensor([-100,    0, -100, -100, -100, -100,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0, -100,    0,    0,    0,    0,    3,    0,
           0,    1,    2,    2,    0,    1,    2,    2,    2,    0, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100

-------------------- 
### TODO: 
    - we will need to do to get exact cause and effect span extracted from these prediction index 
    - if we need to do more to loss function so that we oercome the imbalance in the class
    
-------------------

### Save model

In [33]:
torch.save(model.state_dict(), "finetuned-cause-effect-span-cause-and-effect-35-epochs.pth")

### Load model locally

In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CausalNER()
model.load_state_dict(torch.load("finetuned-cause-effect-span-cause-and-effect-35-epochs.pth"))
model.to(device)
model.eval()

You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.1.attention.self.value.bias', 'roberta.encoder.layer.6.attention.self.query.bias', 'roberta.encoder.layer.10.attention.self.query.bias', 'roberta.encoder.layer.4.attention.self.value.weight', 'roberta.encoder.layer.4.output.dense.bias', 'roberta.encoder.layer.6.output.LayerNorm.bias', 'roberta.encoder.layer.4.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.self.value.weight', 'roberta.encoder.layer.10.output.dense.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.10.attention.self.value.weight', 'roberta.encoder.layer.8.attention.output.LayerNorm.bias', 'roberta.encoder.layer.9.attention.self.value.bias', 'roberta.encoder.layer.11.attention.self.k

CausalNER(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(130, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  